[Lldb-commits] [lldb] 73a6a16 - Revert "Reapply Revert "RegAllocFast: Rewrite and improve""
Muhammad Omair Javaid via lldb-commits
lldb-commits at lists.llvm.org
Tue Sep 22 02:40:29 PDT 2020
Author: Muhammad Omair Javaid
Date: 2020-09-22T14:40:06+05:00
New Revision: 73a6a164b84a8195defbb8f5eeb6faecfc478ad4
URL: https://github.com/llvm/llvm-project/commit/73a6a164b84a8195defbb8f5eeb6faecfc478ad4
DIFF: https://github.com/llvm/llvm-project/commit/73a6a164b84a8195defbb8f5eeb6faecfc478ad4.diff
LOG: Revert "Reapply Revert "RegAllocFast: Rewrite and improve""
This reverts commit 55f9f87da2c2ad791b9e62cccb1c035e037444fa.
Breaks following buildbots:
http://lab.llvm.org:8011/builders/lldb-arm-ubuntu/builds/4306
http://lab.llvm.org:8011/builders/lldb-aarch64-ubuntu/builds/9154
Added:
Modified:
lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp
llvm/lib/CodeGen/RegAllocFast.cpp
llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll
llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll
llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll
llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll
llvm/test/CodeGen/AArch64/arm64_32-null.ll
llvm/test/CodeGen/AArch64/br-cond-not-merge.ll
llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
llvm/test/CodeGen/AArch64/combine-loads.ll
llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll
llvm/test/CodeGen/AArch64/popcount.ll
llvm/test/CodeGen/AArch64/swift-return.ll
llvm/test/CodeGen/AArch64/swifterror.ll
llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir
llvm/test/CodeGen/AArch64/unwind-preserved.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll
llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir
llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll
llvm/test/CodeGen/AMDGPU/spill-agpr.mir
llvm/test/CodeGen/AMDGPU/spill-m0.ll
llvm/test/CodeGen/AMDGPU/spill192.mir
llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll
llvm/test/CodeGen/ARM/Windows/alloca.ll
llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll
llvm/test/CodeGen/ARM/cmpxchg-O0.ll
llvm/test/CodeGen/ARM/crash-greedy-v6.ll
llvm/test/CodeGen/ARM/debug-info-blocks.ll
llvm/test/CodeGen/ARM/fast-isel-call.ll
llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll
llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
llvm/test/CodeGen/ARM/fast-isel-select.ll
llvm/test/CodeGen/ARM/fast-isel-vararg.ll
llvm/test/CodeGen/ARM/ldrd.ll
llvm/test/CodeGen/ARM/legalize-bitcast.ll
llvm/test/CodeGen/ARM/pr47454.ll
llvm/test/CodeGen/ARM/stack-guard-reassign.ll
llvm/test/CodeGen/ARM/swifterror.ll
llvm/test/CodeGen/ARM/thumb-big-stack.ll
llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll
llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll
llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll
llvm/test/CodeGen/Mips/Fast-ISel/pr40325.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add_vec.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/aggregate_struct_return.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitreverse.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/branch.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/brindirect.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctpop.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/float_constants.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_4_unaligned.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_split_because_of_memsize_or_align.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul_vec.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/phi.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/rem_and_div.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_4_unaligned.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_split_because_of_memsize_or_align.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub_vec.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/test_TypeInfoforMF.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/var_arg.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zextLoad_and_sextLoad.ll
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zext_and_sext.ll
llvm/test/CodeGen/Mips/atomic-min-max.ll
llvm/test/CodeGen/Mips/atomic.ll
llvm/test/CodeGen/Mips/atomic64.ll
llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll
llvm/test/CodeGen/Mips/copy-fp64.ll
llvm/test/CodeGen/Mips/implicit-sret.ll
llvm/test/CodeGen/Mips/micromips-eva.mir
llvm/test/CodeGen/Mips/msa/ldr_str.ll
llvm/test/CodeGen/PowerPC/addegluecrash.ll
llvm/test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll
llvm/test/CodeGen/PowerPC/aix-overflow-toc.py
llvm/test/CodeGen/PowerPC/anon_aggr.ll
llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
llvm/test/CodeGen/PowerPC/elf-common.ll
llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll
llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll
llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll
llvm/test/CodeGen/PowerPC/fp64-to-int16.ll
llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll
llvm/test/CodeGen/PowerPC/popcount.ll
llvm/test/CodeGen/PowerPC/spill-nor0.ll
llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll
llvm/test/CodeGen/PowerPC/vsx-args.ll
llvm/test/CodeGen/PowerPC/vsx.ll
llvm/test/CodeGen/SPARC/fp16-promote.ll
llvm/test/CodeGen/SystemZ/swift-return.ll
llvm/test/CodeGen/SystemZ/swifterror.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll
llvm/test/CodeGen/Thumb2/high-reg-spill.mir
llvm/test/CodeGen/Thumb2/mve-vector-spill.ll
llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
llvm/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll
llvm/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll
llvm/test/CodeGen/X86/atomic-monotonic.ll
llvm/test/CodeGen/X86/atomic-unordered.ll
llvm/test/CodeGen/X86/atomic32.ll
llvm/test/CodeGen/X86/atomic64.ll
llvm/test/CodeGen/X86/atomic6432.ll
llvm/test/CodeGen/X86/avx-load-store.ll
llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll
llvm/test/CodeGen/X86/crash-O0.ll
llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll
llvm/test/CodeGen/X86/fast-isel-cmp-branch.ll
llvm/test/CodeGen/X86/fast-isel-nontemporal.ll
llvm/test/CodeGen/X86/fast-isel-select-sse.ll
llvm/test/CodeGen/X86/fast-isel-select.ll
llvm/test/CodeGen/X86/fast-isel-x86-64.ll
llvm/test/CodeGen/X86/mixed-ptr-sizes-i686.ll
llvm/test/CodeGen/X86/mixed-ptr-sizes.ll
llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll
llvm/test/CodeGen/X86/pr11415.ll
llvm/test/CodeGen/X86/pr1489.ll
llvm/test/CodeGen/X86/pr27591.ll
llvm/test/CodeGen/X86/pr30430.ll
llvm/test/CodeGen/X86/pr30813.ll
llvm/test/CodeGen/X86/pr32241.ll
llvm/test/CodeGen/X86/pr32284.ll
llvm/test/CodeGen/X86/pr32340.ll
llvm/test/CodeGen/X86/pr32345.ll
llvm/test/CodeGen/X86/pr32451.ll
llvm/test/CodeGen/X86/pr32484.ll
llvm/test/CodeGen/X86/pr34592.ll
llvm/test/CodeGen/X86/pr34653.ll
llvm/test/CodeGen/X86/pr39733.ll
llvm/test/CodeGen/X86/pr42452.ll
llvm/test/CodeGen/X86/pr44749.ll
llvm/test/CodeGen/X86/pr47000.ll
llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir
llvm/test/CodeGen/X86/stack-protector-msvc.ll
llvm/test/CodeGen/X86/stack-protector-strong-macho-win32-xor.ll
llvm/test/CodeGen/X86/swift-return.ll
llvm/test/CodeGen/X86/swifterror.ll
llvm/test/CodeGen/X86/volatile.ll
llvm/test/CodeGen/X86/win64_eh.ll
llvm/test/CodeGen/X86/x86-32-intrcc.ll
llvm/test/CodeGen/X86/x86-64-intrcc.ll
llvm/test/DebugInfo/AArch64/frameindices.ll
llvm/test/DebugInfo/AArch64/prologue_end.ll
llvm/test/DebugInfo/ARM/prologue_end.ll
llvm/test/DebugInfo/Mips/delay-slot.ll
llvm/test/DebugInfo/Mips/prologue_end.ll
llvm/test/DebugInfo/X86/dbg-declare-arg.ll
llvm/test/DebugInfo/X86/fission-ranges.ll
llvm/test/DebugInfo/X86/op_deref.ll
llvm/test/DebugInfo/X86/parameters.ll
llvm/test/DebugInfo/X86/pieces-1.ll
llvm/test/DebugInfo/X86/prologue-stack.ll
llvm/test/DebugInfo/X86/reference-argument.ll
llvm/test/DebugInfo/X86/spill-indirect-nrvo.ll
llvm/test/DebugInfo/X86/sret.ll
llvm/test/DebugInfo/X86/subreg.ll
Removed:
llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir
llvm/test/CodeGen/AMDGPU/fastregalloc-illegal-subreg-physreg.mir
llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir
llvm/test/CodeGen/PowerPC/spill-nor0.mir
llvm/test/CodeGen/X86/bug47278-eflags-error.mir
llvm/test/CodeGen/X86/bug47278.mir
################################################################################
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp b/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp
index 8d101ba280e8..be0575541a62 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp
+++ b/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp
@@ -28,9 +28,11 @@ int main(int argc, char **argv) {
// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+17>: mov dword ptr [rsp + 0x24], ecx
// CHECK: ** 15 foo();
// CHECK: disassembly.cpp.tmp.exe[{{.*}}] <+21>: call {{.*}} ; foo at disassembly.cpp:12
-// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+26>: xor eax, eax
+// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+26>: xor ecx, ecx
+// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+28>: mov dword ptr [rsp + 0x20], eax
// CHECK: ** 16 return 0;
// CHECK-NEXT: 17 }
// CHECK-NEXT: 18
-// CHECK: disassembly.cpp.tmp.exe[{{.*}}] <+28>: add rsp, 0x38
-// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+32>: ret
+// CHECK: disassembly.cpp.tmp.exe[{{.*}}] <+32>: mov eax, ecx
+// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+34>: add rsp, 0x38
+// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+38>: ret
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index cfee1a77d6b8..68308c6e1d4b 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -56,10 +56,6 @@ STATISTIC(NumStores, "Number of stores added");
STATISTIC(NumLoads , "Number of loads added");
STATISTIC(NumCoalesced, "Number of copies coalesced");
-// FIXME: Remove this switch when all testcases are fixed!
-static cl::opt<bool> IgnoreMissingDefs("rafast-ignore-missing-defs",
- cl::Hidden);
-
static RegisterRegAlloc
fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
@@ -89,9 +85,8 @@ namespace {
MachineInstr *LastUse = nullptr; ///< Last instr to use reg.
Register VirtReg; ///< Virtual register number.
MCPhysReg PhysReg = 0; ///< Currently held here.
- bool LiveOut = false; ///< Register is possibly live out.
- bool Reloaded = false; ///< Register was reloaded.
- bool Error = false; ///< Could not allocate.
+ unsigned short LastOpNum = 0; ///< OpNum on LastUse.
+ bool Dirty = false; ///< Register needs spill.
explicit LiveReg(Register VirtReg) : VirtReg(VirtReg) {}
@@ -106,9 +101,6 @@ namespace {
LiveRegMap LiveVirtRegs;
DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap;
- /// List of DBG_VALUE that we encountered without the vreg being assigned
- /// because they were placed after the last use of the vreg.
- DenseMap<unsigned, SmallVector<MachineInstr *, 1>> DanglingDbgValues;
/// Has a bit set for every virtual register for which it was determined
/// that it is alive across blocks.
@@ -120,13 +112,9 @@ namespace {
/// immediately without checking aliases.
regFree,
- /// A pre-assigned register has been assigned before register allocation
- /// (e.g., setting up a call parameter).
- regPreAssigned,
-
- /// Used temporarily in reloadAtBegin() to mark register units that are
- /// live-in to the basic block.
- regLiveIn,
+ /// A reserved register has been assigned explicitly (e.g., setting up a
+ /// call parameter), and it remains reserved until it is used.
+ regReserved
/// A register state may also be a virtual register number, indication
/// that the physical register is currently allocated to a virtual
@@ -136,17 +124,15 @@ namespace {
/// Maps each physical register to a RegUnitState enum or virtual register.
std::vector<unsigned> RegUnitStates;
+ SmallVector<Register, 16> VirtDead;
SmallVector<MachineInstr *, 32> Coalesced;
using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>;
/// Set of register units that are used in the current instruction, and so
/// cannot be allocated.
RegUnitSet UsedInInstr;
- RegUnitSet PhysRegUses;
- SmallVector<uint16_t, 8> DefOperandIndexes;
void setPhysRegState(MCPhysReg PhysReg, unsigned NewState);
- bool isPhysRegFree(MCPhysReg PhysReg) const;
/// Mark a physreg as used in this instruction.
void markRegUsedInInstr(MCPhysReg PhysReg) {
@@ -155,29 +141,13 @@ namespace {
}
/// Check if a physreg or any of its aliases are used in this instruction.
- bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const {
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ bool isRegUsedInInstr(MCPhysReg PhysReg) const {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
if (UsedInInstr.count(*Units))
return true;
- if (LookAtPhysRegUses && PhysRegUses.count(*Units))
- return true;
- }
return false;
}
- /// Mark physical register as being used in a register use operand.
- /// This is only used by the special livethrough handling code.
- void markPhysRegUsedInInstr(MCPhysReg PhysReg) {
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
- PhysRegUses.insert(*Units);
- }
-
- /// Remove mark of physical register being used in the instruction.
- void unmarkRegUsedInInstr(MCPhysReg PhysReg) {
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
- UsedInInstr.erase(*Units);
- }
-
enum : unsigned {
spillClean = 50,
spillDirty = 100,
@@ -207,21 +177,27 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override;
void allocateBasicBlock(MachineBasicBlock &MBB);
-
- void addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts,
- Register Reg) const;
-
void allocateInstruction(MachineInstr &MI);
void handleDebugValue(MachineInstr &MI);
+ void handleThroughOperands(MachineInstr &MI,
+ SmallVectorImpl<Register> &VirtDead);
+ bool isLastUseOfLocalReg(const MachineOperand &MO) const;
+
+ void addKillFlag(const LiveReg &LRI);
#ifndef NDEBUG
bool verifyRegStateMapping(const LiveReg &LR) const;
#endif
- bool usePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
- bool definePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
- bool displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
- void freePhysReg(MCPhysReg PhysReg);
+ void killVirtReg(LiveReg &LR);
+ void killVirtReg(Register VirtReg);
+ void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR);
+ void spillVirtReg(MachineBasicBlock::iterator MI, Register VirtReg);
+
+ void usePhysReg(MachineOperand &MO);
+ void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg,
+ unsigned NewState);
unsigned calcSpillCost(MCPhysReg PhysReg) const;
+ void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg);
LiveRegMap::iterator findLiveVirtReg(Register VirtReg) {
return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
@@ -231,24 +207,14 @@ namespace {
return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
}
- void assignVirtToPhysReg(MachineInstr &MI, LiveReg &, MCPhysReg PhysReg);
- void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint,
- bool LookAtPhysRegUses = false);
+ void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint);
void allocVirtRegUndef(MachineOperand &MO);
- void assignDanglingDebugValues(MachineInstr &Def, Register VirtReg,
- MCPhysReg Reg);
- void defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
- Register VirtReg);
- void defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
- bool LookAtPhysRegUses = false);
- void useVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg);
-
- MachineBasicBlock::iterator
- getMBBBeginInsertionPoint(MachineBasicBlock &MBB,
- SmallSet<Register, 2> &PrologLiveIns) const;
-
- void reloadAtBegin(MachineBasicBlock &MBB);
- void setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
+ MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
+ Register Hint);
+ LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
+ Register Hint);
+ void spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut);
+ bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
Register traceCopies(Register VirtReg) const;
Register traceCopyChain(Register Reg) const;
@@ -277,14 +243,6 @@ void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
RegUnitStates[*UI] = NewState;
}
-bool RegAllocFast::isPhysRegFree(MCPhysReg PhysReg) const {
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
- if (RegUnitStates[*UI] != regFree)
- return false;
- }
- return true;
-}
-
/// This allocates space for the specified virtual register to be held on the
/// stack.
int RegAllocFast::getStackSpaceFor(Register VirtReg) {
@@ -342,7 +300,7 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) {
// block.
static const unsigned Limit = 8;
unsigned C = 0;
- for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {
+ for (const MachineInstr &UseInst : MRI->reg_nodbg_instructions(VirtReg)) {
if (UseInst.getParent() != MBB || ++C >= Limit) {
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
// Cannot be live-out if there are no successors.
@@ -394,19 +352,15 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI);
++NumStores;
- // When we spill a virtual register, we will have spill instructions behind
- // every definition of it, meaning we can switch all the DBG_VALUEs over
- // to just reference the stack slot.
+ // If this register is used by DBG_VALUE then insert new DBG_VALUE to
+ // identify spilled location as the place to find corresponding variable's
+ // value.
SmallVectorImpl<MachineInstr *> &LRIDbgValues = LiveDbgValueMap[VirtReg];
for (MachineInstr *DBG : LRIDbgValues) {
MachineInstr *NewDV = buildDbgValueForSpill(*MBB, Before, *DBG, FI);
assert(NewDV->getParent() == MBB && "dangling parent pointer");
(void)NewDV;
LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:\n" << *NewDV);
- // Rewrite unassigned dbg_values to use the stack slot.
- MachineOperand &MO = DBG->getOperand(0);
- if (MO.isReg() && MO.getReg() == 0)
- updateDbgValueForSpill(*DBG, FI);
}
// Now this register is spilled there is should not be any DBG_VALUE
// pointing to this register because they are all pointing to spilled value
@@ -425,75 +379,113 @@ void RegAllocFast::reload(MachineBasicBlock::iterator Before, Register VirtReg,
++NumLoads;
}
-/// Get basic block begin insertion point.
-/// This is not just MBB.begin() because surprisingly we have EH_LABEL
-/// instructions marking the begin of a basic block. This means we must insert
-/// new instructions after such labels...
-MachineBasicBlock::iterator
-RegAllocFast::getMBBBeginInsertionPoint(
- MachineBasicBlock &MBB, SmallSet<Register, 2> &PrologLiveIns) const {
- MachineBasicBlock::iterator I = MBB.begin();
- while (I != MBB.end()) {
- if (I->isLabel()) {
- ++I;
- continue;
- }
-
- // Most reloads should be inserted after prolog instructions.
- if (!TII->isBasicBlockPrologue(*I))
- break;
+/// Return true if MO is the only remaining reference to its virtual register,
+/// and it is guaranteed to be a block-local register.
+bool RegAllocFast::isLastUseOfLocalReg(const MachineOperand &MO) const {
+ // If the register has ever been spilled or reloaded, we conservatively assume
+ // it is a global register used in multiple blocks.
+ if (StackSlotForVirtReg[MO.getReg()] != -1)
+ return false;
+
+ // Check that the use/def chain has exactly one operand - MO.
+ MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg());
+ if (&*I != &MO)
+ return false;
+ return ++I == MRI->reg_nodbg_end();
+}
- // However if a prolog instruction reads a register that needs to be
- // reloaded, the reload should be inserted before the prolog.
- for (MachineOperand &MO : I->operands()) {
- if (MO.isReg())
- PrologLiveIns.insert(MO.getReg());
- }
+/// Set kill flags on last use of a virtual register.
+void RegAllocFast::addKillFlag(const LiveReg &LR) {
+ if (!LR.LastUse) return;
+ MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum);
+ if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) {
+ if (MO.getReg() == LR.PhysReg)
+ MO.setIsKill();
+ // else, don't do anything we are problably redefining a
+ // subreg of this register and given we don't track which
+ // lanes are actually dead, we cannot insert a kill flag here.
+ // Otherwise we may end up in a situation like this:
+ // ... = (MO) physreg:sub1, implicit killed physreg
+ // ... <== Here we would allow later pass to reuse physreg:sub1
+ // which is potentially wrong.
+ // LR:sub0 = ...
+ // ... = LR.sub1 <== This is going to use physreg:sub1
+ }
+}
- ++I;
+#ifndef NDEBUG
+bool RegAllocFast::verifyRegStateMapping(const LiveReg &LR) const {
+ for (MCRegUnitIterator UI(LR.PhysReg, TRI); UI.isValid(); ++UI) {
+ if (RegUnitStates[*UI] != LR.VirtReg)
+ return false;
}
- return I;
+ return true;
}
+#endif
-/// Reload all currently assigned virtual registers.
-void RegAllocFast::reloadAtBegin(MachineBasicBlock &MBB) {
- if (LiveVirtRegs.empty())
- return;
+/// Mark virtreg as no longer available.
+void RegAllocFast::killVirtReg(LiveReg &LR) {
+ assert(verifyRegStateMapping(LR) && "Broken RegState mapping");
+ addKillFlag(LR);
+ MCPhysReg PhysReg = LR.PhysReg;
+ setPhysRegState(PhysReg, regFree);
+ LR.PhysReg = 0;
+}
- for (MachineBasicBlock::RegisterMaskPair P : MBB.liveins()) {
- MCPhysReg Reg = P.PhysReg;
- // Set state to live-in. This possibly overrides mappings to virtual
- // registers but we don't care anymore at this point.
- setPhysRegState(Reg, regLiveIn);
- }
+/// Mark virtreg as no longer available.
+void RegAllocFast::killVirtReg(Register VirtReg) {
+ assert(Register::isVirtualRegister(VirtReg) &&
+ "killVirtReg needs a virtual register");
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ if (LRI != LiveVirtRegs.end() && LRI->PhysReg)
+ killVirtReg(*LRI);
+}
+/// This method spills the value specified by VirtReg into the corresponding
+/// stack slot if needed.
+void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
+ Register VirtReg) {
+ assert(Register::isVirtualRegister(VirtReg) &&
+ "Spilling a physical register is illegal!");
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+ "Spilling unmapped virtual register");
+ spillVirtReg(MI, *LRI);
+}
+
+/// Do the actual work of spilling.
+void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
+ assert(verifyRegStateMapping(LR) && "Broken RegState mapping");
+
+ MCPhysReg PhysReg = LR.PhysReg;
+
+ if (LR.Dirty) {
+ // If this physreg is used by the instruction, we want to kill it on the
+ // instruction, not on the spill.
+ bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI;
+ LR.Dirty = false;
- SmallSet<Register, 2> PrologLiveIns;
+ spill(MI, LR.VirtReg, PhysReg, SpillKill);
+ if (SpillKill)
+ LR.LastUse = nullptr; // Don't kill register again
+ }
+ killVirtReg(LR);
+}
+
+/// Spill all dirty virtregs without killing them.
+void RegAllocFast::spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut) {
+ if (LiveVirtRegs.empty())
+ return;
// The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
// of spilling here is deterministic, if arbitrary.
- MachineBasicBlock::iterator InsertBefore
- = getMBBBeginInsertionPoint(MBB, PrologLiveIns);
- for (const LiveReg &LR : LiveVirtRegs) {
- MCPhysReg PhysReg = LR.PhysReg;
- if (PhysReg == 0)
+ for (LiveReg &LR : LiveVirtRegs) {
+ if (!LR.PhysReg)
continue;
-
- unsigned FirstUnit = *MCRegUnitIterator(PhysReg, TRI);
- if (RegUnitStates[FirstUnit] == regLiveIn)
+ if (OnlyLiveOut && !mayLiveOut(LR.VirtReg))
continue;
-
- assert((&MBB != &MBB.getParent()->front() || IgnoreMissingDefs) &&
- "no reload in start block. Missing vreg def?");
-
- if (PrologLiveIns.count(PhysReg)) {
- // FIXME: Theoretically this should use an insert point skipping labels
- // but I'm not sure how labels should interact with prolog instruction
- // that need reloads.
- reload(MBB.begin(), LR.VirtReg, PhysReg);
- } else
- reload(InsertBefore, LR.VirtReg, PhysReg);
+ spillVirtReg(MI, LR);
}
LiveVirtRegs.clear();
}
@@ -501,74 +493,51 @@ void RegAllocFast::reloadAtBegin(MachineBasicBlock &MBB) {
/// Handle the direct use of a physical register. Check that the register is
/// not used by a virtreg. Kill the physreg, marking it free. This may add
/// implicit kills to MO->getParent() and invalidate MO.
-bool RegAllocFast::usePhysReg(MachineInstr &MI, MCPhysReg Reg) {
- assert(Register::isPhysicalRegister(Reg) && "expected physreg");
- bool displacedAny = displacePhysReg(MI, Reg);
- setPhysRegState(Reg, regPreAssigned);
- markRegUsedInInstr(Reg);
- return displacedAny;
-}
+void RegAllocFast::usePhysReg(MachineOperand &MO) {
+ // Ignore undef uses.
+ if (MO.isUndef())
+ return;
-bool RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg Reg) {
- bool displacedAny = displacePhysReg(MI, Reg);
- setPhysRegState(Reg, regPreAssigned);
- return displacedAny;
+ Register PhysReg = MO.getReg();
+ assert(PhysReg.isPhysical() && "Bad usePhysReg operand");
+
+ markRegUsedInInstr(PhysReg);
+
+ for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
+ switch (RegUnitStates[*UI]) {
+ case regReserved:
+ RegUnitStates[*UI] = regFree;
+ LLVM_FALLTHROUGH;
+ case regFree:
+ break;
+ default:
+ llvm_unreachable("Unexpected reg unit state");
+ }
+ }
+
+ // All aliases are disabled, bring register into working set.
+ setPhysRegState(PhysReg, regFree);
+ MO.setIsKill();
}
/// Mark PhysReg as reserved or free after spilling any virtregs. This is very
/// similar to defineVirtReg except the physreg is reserved instead of
/// allocated.
-bool RegAllocFast::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) {
- bool displacedAny = false;
-
+void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
+ MCPhysReg PhysReg, unsigned NewState) {
for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
- unsigned Unit = *UI;
- switch (unsigned VirtReg = RegUnitStates[Unit]) {
- default: {
- LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
- assert(LRI != LiveVirtRegs.end() && "datastructures in sync");
- MachineBasicBlock::iterator ReloadBefore =
- std::next((MachineBasicBlock::iterator)MI.getIterator());
- reload(ReloadBefore, VirtReg, LRI->PhysReg);
-
- setPhysRegState(LRI->PhysReg, regFree);
- LRI->PhysReg = 0;
- LRI->Reloaded = true;
- displacedAny = true;
- break;
- }
- case regPreAssigned:
- RegUnitStates[Unit] = regFree;
- displacedAny = true;
+ switch (unsigned VirtReg = RegUnitStates[*UI]) {
+ default:
+ spillVirtReg(MI, VirtReg);
break;
case regFree:
+ case regReserved:
break;
}
}
- return displacedAny;
-}
-
-void RegAllocFast::freePhysReg(MCPhysReg PhysReg) {
- LLVM_DEBUG(dbgs() << "Freeing " << printReg(PhysReg, TRI) << ':');
- unsigned FirstUnit = *MCRegUnitIterator(PhysReg, TRI);
- switch (unsigned VirtReg = RegUnitStates[FirstUnit]) {
- case regFree:
- LLVM_DEBUG(dbgs() << '\n');
- return;
- case regPreAssigned:
- LLVM_DEBUG(dbgs() << '\n');
- setPhysRegState(PhysReg, regFree);
- return;
- default: {
- LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
- assert(LRI != LiveVirtRegs.end());
- LLVM_DEBUG(dbgs() << ' ' << printReg(LRI->VirtReg, TRI) << '\n');
- setPhysRegState(LRI->PhysReg, regFree);
- LRI->PhysReg = 0;
- }
- return;
- }
+ markRegUsedInInstr(PhysReg);
+ setPhysRegState(PhysReg, NewState);
}
/// Return the cost of spilling clearing out PhysReg and aliases so it is free
@@ -576,61 +545,35 @@ void RegAllocFast::freePhysReg(MCPhysReg PhysReg) {
/// disabled - it can be allocated directly.
/// \returns spillImpossible when PhysReg or an alias can't be spilled.
unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
+ if (isRegUsedInInstr(PhysReg)) {
+ LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI)
+ << " is already used in instr.\n");
+ return spillImpossible;
+ }
+
for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
switch (unsigned VirtReg = RegUnitStates[*UI]) {
case regFree:
break;
- case regPreAssigned:
- LLVM_DEBUG(dbgs() << "Cannot spill pre-assigned "
- << printReg(PhysReg, TRI) << '\n');
+ case regReserved:
+ LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding "
+ << printReg(PhysReg, TRI) << " is reserved already.\n");
return spillImpossible;
default: {
- bool SureSpill = StackSlotForVirtReg[VirtReg] != -1 ||
- findLiveVirtReg(VirtReg)->LiveOut;
- return SureSpill ? spillClean : spillDirty;
+ LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+ "Missing VirtReg entry");
+ return LRI->Dirty ? spillDirty : spillClean;
}
}
}
return 0;
}
-void RegAllocFast::assignDanglingDebugValues(MachineInstr &Definition,
- Register VirtReg, MCPhysReg Reg) {
- auto UDBGValIter = DanglingDbgValues.find(VirtReg);
- if (UDBGValIter == DanglingDbgValues.end())
- return;
-
- SmallVectorImpl<MachineInstr*> &Dangling = UDBGValIter->second;
- for (MachineInstr *DbgValue : Dangling) {
- assert(DbgValue->isDebugValue());
- MachineOperand &MO = DbgValue->getOperand(0);
- if (!MO.isReg())
- continue;
-
- // Test whether the physreg survives from the definition to the DBG_VALUE.
- MCPhysReg SetToReg = Reg;
- unsigned Limit = 20;
- for (MachineBasicBlock::iterator I = std::next(Definition.getIterator()),
- E = DbgValue->getIterator(); I != E; ++I) {
- if (I->modifiesRegister(Reg, TRI) || --Limit == 0) {
- LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue
- << '\n');
- SetToReg = 0;
- break;
- }
- }
- MO.setReg(SetToReg);
- if (SetToReg != 0)
- MO.setIsRenamable();
- }
- Dangling.clear();
-}
-
/// This method updates local state so that we know that PhysReg is the
/// proper container for VirtReg now. The physical register must not be used
/// for anything else when this is called.
-void RegAllocFast::assignVirtToPhysReg(MachineInstr &AtMI, LiveReg &LR,
- MCPhysReg PhysReg) {
+void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
Register VirtReg = LR.VirtReg;
LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to "
<< printReg(PhysReg, TRI) << '\n');
@@ -638,8 +581,6 @@ void RegAllocFast::assignVirtToPhysReg(MachineInstr &AtMI, LiveReg &LR,
assert(PhysReg != 0 && "Trying to assign no register");
LR.PhysReg = PhysReg;
setPhysRegState(PhysReg, VirtReg);
-
- assignDanglingDebugValues(AtMI, VirtReg, PhysReg);
}
static bool isCoalescable(const MachineInstr &MI) {
@@ -683,10 +624,11 @@ Register RegAllocFast::traceCopies(Register VirtReg) const {
}
/// Allocates a physical register for VirtReg.
-void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR,
- Register Hint0, bool LookAtPhysRegUses) {
+void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) {
const Register VirtReg = LR.VirtReg;
- assert(LR.PhysReg == 0);
+
+ assert(Register::isVirtualRegister(VirtReg) &&
+ "Can only allocate virtual registers");
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
LLVM_DEBUG(dbgs() << "Search register for " << printReg(VirtReg)
@@ -694,36 +636,41 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR,
<< " with hint " << printReg(Hint0, TRI) << '\n');
// Take hint when possible.
- if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) && RC.contains(Hint0) &&
- !isRegUsedInInstr(Hint0, LookAtPhysRegUses)) {
- // Take hint if the register is currently free.
- if (isPhysRegFree(Hint0)) {
+ if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) &&
+ RC.contains(Hint0)) {
+ // Ignore the hint if we would have to spill a dirty register.
+ unsigned Cost = calcSpillCost(Hint0);
+ if (Cost < spillDirty) {
LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI)
<< '\n');
- assignVirtToPhysReg(MI, LR, Hint0);
+ if (Cost)
+ definePhysReg(MI, Hint0, regFree);
+ assignVirtToPhysReg(LR, Hint0);
return;
} else {
- LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint0, TRI)
- << " occupied\n");
+ LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI)
+ << "occupied\n");
}
} else {
Hint0 = Register();
}
-
// Try other hint.
Register Hint1 = traceCopies(VirtReg);
- if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) && RC.contains(Hint1) &&
- !isRegUsedInInstr(Hint1, LookAtPhysRegUses)) {
- // Take hint if the register is currently free.
- if (isPhysRegFree(Hint1)) {
+ if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) &&
+ RC.contains(Hint1) && !isRegUsedInInstr(Hint1)) {
+ // Ignore the hint if we would have to spill a dirty register.
+ unsigned Cost = calcSpillCost(Hint1);
+ if (Cost < spillDirty) {
LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI)
- << '\n');
- assignVirtToPhysReg(MI, LR, Hint1);
+ << '\n');
+ if (Cost)
+ definePhysReg(MI, Hint1, regFree);
+ assignVirtToPhysReg(LR, Hint1);
return;
} else {
- LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint1, TRI)
- << " occupied\n");
+ LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI)
+ << "occupied\n");
}
} else {
Hint1 = Register();
@@ -734,20 +681,15 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR,
ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
for (MCPhysReg PhysReg : AllocationOrder) {
LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << ' ');
- if (isRegUsedInInstr(PhysReg, LookAtPhysRegUses)) {
- LLVM_DEBUG(dbgs() << "already used in instr.\n");
- continue;
- }
-
unsigned Cost = calcSpillCost(PhysReg);
LLVM_DEBUG(dbgs() << "Cost: " << Cost << " BestCost: " << BestCost << '\n');
// Immediate take a register with cost 0.
if (Cost == 0) {
- assignVirtToPhysReg(MI, LR, PhysReg);
+ assignVirtToPhysReg(LR, PhysReg);
return;
}
- if (PhysReg == Hint0 || PhysReg == Hint1)
+ if (PhysReg == Hint1 || PhysReg == Hint0)
Cost -= spillPrefBonus;
if (Cost < BestCost) {
@@ -763,14 +705,13 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR,
MI.emitError("inline assembly requires more registers than available");
else
MI.emitError("ran out of registers during register allocation");
-
- LR.Error = true;
- LR.PhysReg = 0;
+ definePhysReg(MI, *AllocationOrder.begin(), regFree);
+ assignVirtToPhysReg(LR, *AllocationOrder.begin());
return;
}
- displacePhysReg(MI, BestReg);
- assignVirtToPhysReg(MI, LR, BestReg);
+ definePhysReg(MI, BestReg, regFree);
+ assignVirtToPhysReg(LR, BestReg);
}
void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
@@ -798,166 +739,212 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
MO.setIsRenamable(true);
}
-/// Variation of defineVirtReg() with special handling for livethrough regs
-/// (tied or earlyclobber) that may interfere with preassigned uses.
-void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
- Register VirtReg) {
- LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
- if (LRI != LiveVirtRegs.end()) {
- MCPhysReg PrevReg = LRI->PhysReg;
- if (PrevReg != 0 && isRegUsedInInstr(PrevReg, true)) {
- LLVM_DEBUG(dbgs() << "Need new assignment for " << printReg(PrevReg, TRI)
- << " (tied/earlyclobber resolution)\n");
- freePhysReg(PrevReg);
- LRI->PhysReg = 0;
- allocVirtReg(MI, *LRI, 0, true);
- MachineBasicBlock::iterator InsertBefore =
- std::next((MachineBasicBlock::iterator)MI.getIterator());
- LLVM_DEBUG(dbgs() << "Copy " << printReg(LRI->PhysReg, TRI) << " to "
- << printReg(PrevReg, TRI) << '\n');
- BuildMI(*MBB, InsertBefore, MI.getDebugLoc(),
- TII->get(TargetOpcode::COPY), PrevReg)
- .addReg(LRI->PhysReg, llvm::RegState::Kill);
- }
- MachineOperand &MO = MI.getOperand(OpNum);
- if (MO.getSubReg() && !MO.isUndef()) {
- LRI->LastUse = &MI;
- }
- }
- return defineVirtReg(MI, OpNum, VirtReg, true);
-}
-
-/// Allocates a register for VirtReg definition. Typically the register is
-/// already assigned from a use of the virtreg, however we still need to
-/// perform an allocation if:
-/// - It is a dead definition without any uses.
-/// - The value is live out and all uses are in
diff erent basic blocks.
-void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
- Register VirtReg, bool LookAtPhysRegUses) {
- assert(VirtReg.isVirtual() && "Not a virtual register");
- MachineOperand &MO = MI.getOperand(OpNum);
+/// Allocates a register for VirtReg and mark it as dirty.
+MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
+ Register VirtReg, Register Hint) {
+ assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
- if (New) {
- if (!MO.isDead()) {
- if (mayLiveOut(VirtReg)) {
- LRI->LiveOut = true;
- } else {
- // It is a dead def without the dead flag; add the flag now.
- MO.setIsDead(true);
- }
- }
- }
- if (LRI->PhysReg == 0)
- allocVirtReg(MI, *LRI, 0, LookAtPhysRegUses);
- else {
- assert(!isRegUsedInInstr(LRI->PhysReg, LookAtPhysRegUses) &&
- "TODO: preassign mismatch");
- LLVM_DEBUG(dbgs() << "In def of " << printReg(VirtReg, TRI)
- << " use existing assignment to "
- << printReg(LRI->PhysReg, TRI) << '\n');
- }
-
- MCPhysReg PhysReg = LRI->PhysReg;
- assert(PhysReg != 0 && "Register not assigned");
- if (LRI->Reloaded || LRI->LiveOut) {
- if (!MI.isImplicitDef()) {
- MachineBasicBlock::iterator SpillBefore =
- std::next((MachineBasicBlock::iterator)MI.getIterator());
- LLVM_DEBUG(dbgs() << "Spill Reason: LO: " << LRI->LiveOut << " RL: "
- << LRI->Reloaded << '\n');
- bool Kill = LRI->LastUse == nullptr;
- spill(SpillBefore, VirtReg, PhysReg, Kill);
- LRI->LastUse = nullptr;
+ if (!LRI->PhysReg) {
+ // If there is no hint, peek at the only use of this register.
+ if ((!Hint || !Hint.isPhysical()) &&
+ MRI->hasOneNonDBGUse(VirtReg)) {
+ const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg);
+ // It's a copy, use the destination register as a hint.
+ if (UseMI.isCopyLike())
+ Hint = UseMI.getOperand(0).getReg();
}
- LRI->LiveOut = false;
- LRI->Reloaded = false;
+ allocVirtReg(MI, *LRI, Hint);
+ } else if (LRI->LastUse) {
+ // Redefining a live register - kill at the last use, unless it is this
+ // instruction defining VirtReg multiple times.
+ if (LRI->LastUse != &MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse())
+ addKillFlag(*LRI);
}
- markRegUsedInInstr(PhysReg);
- setPhysReg(MI, MO, PhysReg);
+ assert(LRI->PhysReg && "Register not assigned");
+ LRI->LastUse = &MI;
+ LRI->LastOpNum = OpNum;
+ LRI->Dirty = true;
+ markRegUsedInInstr(LRI->PhysReg);
+ return LRI->PhysReg;
}
-/// Allocates a register for a VirtReg use.
-void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
- Register VirtReg) {
- assert(VirtReg.isVirtual() && "Not a virtual register");
- MachineOperand &MO = MI.getOperand(OpNum);
+/// Make sure VirtReg is available in a physreg and return it.
+RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI,
+ unsigned OpNum,
+ Register VirtReg,
+ Register Hint) {
+ assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
- if (New) {
- MachineOperand &MO = MI.getOperand(OpNum);
- if (!MO.isKill()) {
- if (mayLiveOut(VirtReg)) {
- LRI->LiveOut = true;
- } else {
- // It is a last (killing) use without the kill flag; add the flag now.
- MO.setIsKill(true);
- }
- }
- } else {
- assert((!MO.isKill() || LRI->LastUse == &MI) && "Invalid kill flag");
- }
-
- // If necessary allocate a register.
- if (LRI->PhysReg == 0) {
- assert(!MO.isTied() && "tied op should be allocated");
- Register Hint;
- if (MI.isCopy() && MI.getOperand(1).getSubReg() == 0) {
- Hint = MI.getOperand(0).getReg();
- assert(Hint.isPhysical() &&
- "Copy destination should already be assigned");
- }
- allocVirtReg(MI, *LRI, Hint, false);
- if (LRI->Error) {
- const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
- ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
- setPhysReg(MI, MO, *AllocationOrder.begin());
- return;
+ MachineOperand &MO = MI.getOperand(OpNum);
+ if (!LRI->PhysReg) {
+ allocVirtReg(MI, *LRI, Hint);
+ reload(MI, VirtReg, LRI->PhysReg);
+ } else if (LRI->Dirty) {
+ if (isLastUseOfLocalReg(MO)) {
+ LLVM_DEBUG(dbgs() << "Killing last use: " << MO << '\n');
+ if (MO.isUse())
+ MO.setIsKill();
+ else
+ MO.setIsDead();
+ } else if (MO.isKill()) {
+ LLVM_DEBUG(dbgs() << "Clearing dubious kill: " << MO << '\n');
+ MO.setIsKill(false);
+ } else if (MO.isDead()) {
+ LLVM_DEBUG(dbgs() << "Clearing dubious dead: " << MO << '\n');
+ MO.setIsDead(false);
}
+ } else if (MO.isKill()) {
+ // We must remove kill flags from uses of reloaded registers because the
+ // register would be killed immediately, and there might be a second use:
+ // %foo = OR killed %x, %x
+ // This would cause a second reload of %x into a
diff erent register.
+ LLVM_DEBUG(dbgs() << "Clearing clean kill: " << MO << '\n');
+ MO.setIsKill(false);
+ } else if (MO.isDead()) {
+ LLVM_DEBUG(dbgs() << "Clearing clean dead: " << MO << '\n');
+ MO.setIsDead(false);
}
-
+ assert(LRI->PhysReg && "Register not assigned");
LRI->LastUse = &MI;
+ LRI->LastOpNum = OpNum;
markRegUsedInInstr(LRI->PhysReg);
- setPhysReg(MI, MO, LRI->PhysReg);
+ return *LRI;
}
/// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This
/// may invalidate any operand pointers. Return true if the operand kills its
/// register.
-void RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
+bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
MCPhysReg PhysReg) {
+ bool Dead = MO.isDead();
if (!MO.getSubReg()) {
MO.setReg(PhysReg);
MO.setIsRenamable(true);
- return;
+ return MO.isKill() || Dead;
}
// Handle subregister index.
MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : Register());
MO.setIsRenamable(true);
- // Note: We leave the subreg number around a little longer in case of defs.
- // This is so that the register freeing logic in allocateInstruction can still
- // recognize this as subregister defs. The code there will clear the number.
- if (!MO.isDef())
- MO.setSubReg(0);
+ MO.setSubReg(0);
// A kill flag implies killing the full register. Add corresponding super
// register kill.
if (MO.isKill()) {
MI.addRegisterKilled(PhysReg, TRI, true);
- return;
+ return true;
}
// A <def,read-undef> of a sub-register requires an implicit def of the full
// register.
- if (MO.isDef() && MO.isUndef()) {
- if (MO.isDead())
- MI.addRegisterDead(PhysReg, TRI, true);
- else
- MI.addRegisterDefined(PhysReg, TRI);
+ if (MO.isDef() && MO.isUndef())
+ MI.addRegisterDefined(PhysReg, TRI);
+
+ return Dead;
+}
+
+// Handles special instruction operand like early clobbers and tied ops when
+// there are additional physreg defines.
+void RegAllocFast::handleThroughOperands(MachineInstr &MI,
+ SmallVectorImpl<Register> &VirtDead) {
+ LLVM_DEBUG(dbgs() << "Scanning for through registers:");
+ SmallSet<Register, 8> ThroughRegs;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg()) continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual())
+ continue;
+ if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) ||
+ (MO.getSubReg() && MI.readsVirtualRegister(Reg))) {
+ if (ThroughRegs.insert(Reg).second)
+ LLVM_DEBUG(dbgs() << ' ' << printReg(Reg));
+ }
+ }
+
+ // If any physreg defines collide with preallocated through registers,
+ // we must spill and reallocate.
+ LLVM_DEBUG(dbgs() << "\nChecking for physdef collisions.\n");
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isDef()) continue;
+ Register Reg = MO.getReg();
+ if (!Reg || !Reg.isPhysical())
+ continue;
+ markRegUsedInInstr(Reg);
+
+ for (MCRegUnitIterator UI(Reg, TRI); UI.isValid(); ++UI) {
+ if (!ThroughRegs.count(RegUnitStates[*UI]))
+ continue;
+
+ // Need to spill any aliasing registers.
+ for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
+ for (MCSuperRegIterator SI(*RI, TRI, true); SI.isValid(); ++SI) {
+ definePhysReg(MI, *SI, regFree);
+ }
+ }
+ }
}
+
+ SmallVector<Register, 8> PartialDefs;
+ LLVM_DEBUG(dbgs() << "Allocating tied uses.\n");
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg()) continue;
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
+ continue;
+ if (MO.isUse()) {
+ if (!MO.isTied()) continue;
+ LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO
+ << ") is tied to operand " << MI.findTiedOperandIdx(I)
+ << ".\n");
+ LiveReg &LR = reloadVirtReg(MI, I, Reg, 0);
+ MCPhysReg PhysReg = LR.PhysReg;
+ setPhysReg(MI, MO, PhysReg);
+ // Note: we don't update the def operand yet. That would cause the normal
+ // def-scan to attempt spilling.
+ } else if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) {
+ LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << '\n');
+ // Reload the register, but don't assign to the operand just yet.
+ // That would confuse the later phys-def processing pass.
+ LiveReg &LR = reloadVirtReg(MI, I, Reg, 0);
+ PartialDefs.push_back(LR.PhysReg);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Allocating early clobbers.\n");
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg()) continue;
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
+ continue;
+ if (!MO.isEarlyClobber())
+ continue;
+ // Note: defineVirtReg may invalidate MO.
+ MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, 0);
+ if (setPhysReg(MI, MI.getOperand(I), PhysReg))
+ VirtDead.push_back(Reg);
+ }
+
+ // Restore UsedInInstr to a state usable for allocating normal virtual uses.
+ UsedInInstr.clear();
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
+ Register Reg = MO.getReg();
+ if (!Reg || !Reg.isPhysical())
+ continue;
+ LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI)
+ << " as used in instr\n");
+ markRegUsedInInstr(Reg);
+ }
+
+ // Also mark PartialDefs as used to avoid reallocation.
+ for (Register PartialDef : PartialDefs)
+ markRegUsedInInstr(PartialDef);
}
#ifndef NDEBUG
@@ -968,21 +955,15 @@ void RegAllocFast::dumpState() const {
switch (unsigned VirtReg = RegUnitStates[Unit]) {
case regFree:
break;
- case regPreAssigned:
+ case regReserved:
dbgs() << " " << printRegUnit(Unit, TRI) << "[P]";
break;
- case regLiveIn:
- llvm_unreachable("Should not have regLiveIn in map");
default: {
dbgs() << ' ' << printRegUnit(Unit, TRI) << '=' << printReg(VirtReg);
LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry");
- if (I->LiveOut || I->Reloaded) {
- dbgs() << '[';
- if (I->LiveOut) dbgs() << 'O';
- if (I->Reloaded) dbgs() << 'R';
- dbgs() << ']';
- }
+ if (I->Dirty)
+ dbgs() << "[D]";
assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present");
break;
}
@@ -1005,277 +986,111 @@ void RegAllocFast::dumpState() const {
}
#endif
-/// Count number of defs consumed from each register class by \p Reg
-void RegAllocFast::addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts,
- Register Reg) const {
- assert(RegClassDefCounts.size() == TRI->getNumRegClasses());
-
- if (Reg.isVirtual()) {
- const TargetRegisterClass *OpRC = MRI->getRegClass(Reg);
- for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses();
- RCIdx != RCIdxEnd; ++RCIdx) {
- const TargetRegisterClass *IdxRC = TRI->getRegClass(RCIdx);
- // FIXME: Consider aliasing sub/super registers.
- if (OpRC->hasSubClassEq(IdxRC))
- ++RegClassDefCounts[RCIdx];
- }
-
- return;
- }
-
- for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses();
- RCIdx != RCIdxEnd; ++RCIdx) {
- const TargetRegisterClass *IdxRC = TRI->getRegClass(RCIdx);
- for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
- if (IdxRC->contains(*Alias)) {
- ++RegClassDefCounts[RCIdx];
- break;
- }
- }
- }
-}
-
void RegAllocFast::allocateInstruction(MachineInstr &MI) {
- // The basic algorithm here is:
- // 1. Mark registers of def operands as free
- // 2. Allocate registers to use operands and place reload instructions for
- // registers displaced by the allocation.
- //
- // However we need to handle some corner cases:
- // - pre-assigned defs and uses need to be handled before the other def/use
- // operands are processed to avoid the allocation heuristics clashing with
- // the pre-assignment.
- // - The "free def operands" step has to come last instead of first for tied
- // operands and early-clobbers.
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // If this is a copy, we may be able to coalesce.
+ Register CopySrcReg;
+ Register CopyDstReg;
+ unsigned CopySrcSub = 0;
+ unsigned CopyDstSub = 0;
+ if (MI.isCopy()) {
+ CopyDstReg = MI.getOperand(0).getReg();
+ CopySrcReg = MI.getOperand(1).getReg();
+ CopyDstSub = MI.getOperand(0).getSubReg();
+ CopySrcSub = MI.getOperand(1).getSubReg();
+ }
+ // Track registers used by instruction.
UsedInInstr.clear();
- // Scan for special cases; Apply pre-assigned register defs to state.
- bool HasPhysRegUse = false;
- bool HasRegMask = false;
- bool HasVRegDef = false;
- bool HasDef = false;
- bool HasEarlyClobber = false;
- bool NeedToAssignLiveThroughs = false;
- for (MachineOperand &MO : MI.operands()) {
- if (MO.isReg()) {
- Register Reg = MO.getReg();
- if (Reg.isVirtual()) {
- if (MO.isDef()) {
- HasDef = true;
- HasVRegDef = true;
- if (MO.isEarlyClobber()) {
- HasEarlyClobber = true;
- NeedToAssignLiveThroughs = true;
- }
- if (MO.isTied() || (MO.getSubReg() != 0 && !MO.isUndef()))
- NeedToAssignLiveThroughs = true;
- }
- } else if (Reg.isPhysical()) {
- if (!MRI->isReserved(Reg)) {
- if (MO.isDef()) {
- HasDef = true;
- bool displacedAny = definePhysReg(MI, Reg);
- if (MO.isEarlyClobber())
- HasEarlyClobber = true;
- if (!displacedAny)
- MO.setIsDead(true);
- }
- if (MO.readsReg())
- HasPhysRegUse = true;
- }
- }
- } else if (MO.isRegMask()) {
- HasRegMask = true;
+ // First scan.
+ // Mark physreg uses and early clobbers as used.
+ // Find the end of the virtreg operands
+ unsigned VirtOpEnd = 0;
+ bool hasTiedOps = false;
+ bool hasEarlyClobbers = false;
+ bool hasPartialRedefs = false;
+ bool hasPhysDefs = false;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ // Make sure MRI knows about registers clobbered by regmasks.
+ if (MO.isRegMask()) {
+ MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+ continue;
}
- }
-
- // Allocate virtreg defs.
- if (HasDef) {
- if (HasVRegDef) {
- // Special handling for early clobbers, tied operands or subregister defs:
- // Compared to "normal" defs these:
- // - Must not use a register that is pre-assigned for a use operand.
- // - In order to solve tricky inline assembly constraints we change the
- // heuristic to figure out a good operand order before doing
- // assignments.
- if (NeedToAssignLiveThroughs) {
- DefOperandIndexes.clear();
- PhysRegUses.clear();
-
- // Track number of defs which may consume a register from the class.
- std::vector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0);
- assert(RegClassDefCounts[0] == 0);
-
- LLVM_DEBUG(dbgs() << "Need to assign livethroughs\n");
- for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg())
- continue;
- Register Reg = MO.getReg();
- if (MO.readsReg()) {
- if (Reg.isPhysical()) {
- LLVM_DEBUG(dbgs() << "mark extra used: " << printReg(Reg, TRI)
- << '\n');
- markPhysRegUsedInInstr(Reg);
- }
- }
-
- if (MO.isDef()) {
- if (Reg.isVirtual())
- DefOperandIndexes.push_back(I);
-
- addRegClassDefCounts(RegClassDefCounts, Reg);
- }
- }
-
- llvm::sort(DefOperandIndexes.begin(), DefOperandIndexes.end(),
- [&](uint16_t I0, uint16_t I1) {
- const MachineOperand &MO0 = MI.getOperand(I0);
- const MachineOperand &MO1 = MI.getOperand(I1);
- Register Reg0 = MO0.getReg();
- Register Reg1 = MO1.getReg();
- const TargetRegisterClass &RC0 = *MRI->getRegClass(Reg0);
- const TargetRegisterClass &RC1 = *MRI->getRegClass(Reg1);
-
- // Identify regclass that are easy to use up completely just in this
- // instruction.
- unsigned ClassSize0 = RegClassInfo.getOrder(&RC0).size();
- unsigned ClassSize1 = RegClassInfo.getOrder(&RC1).size();
-
- bool SmallClass0 = ClassSize0 < RegClassDefCounts[RC0.getID()];
- bool SmallClass1 = ClassSize1 < RegClassDefCounts[RC1.getID()];
- if (SmallClass0 > SmallClass1)
- return true;
- if (SmallClass0 < SmallClass1)
- return false;
-
- // Allocate early clobbers and livethrough operands first.
- bool Livethrough0 = MO0.isEarlyClobber() || MO0.isTied() ||
- (MO0.getSubReg() == 0 && !MO0.isUndef());
- bool Livethrough1 = MO1.isEarlyClobber() || MO1.isTied() ||
- (MO1.getSubReg() == 0 && !MO1.isUndef());
- if (Livethrough0 > Livethrough1)
- return true;
- if (Livethrough0 < Livethrough1)
- return false;
-
- // Tie-break rule: operand index.
- return I0 < I1;
- });
-
- for (uint16_t OpIdx : DefOperandIndexes) {
- MachineOperand &MO = MI.getOperand(OpIdx);
- LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n');
- unsigned Reg = MO.getReg();
- if (MO.isEarlyClobber() || MO.isTied() ||
- (MO.getSubReg() && !MO.isUndef())) {
- defineLiveThroughVirtReg(MI, OpIdx, Reg);
- } else {
- defineVirtReg(MI, OpIdx, Reg);
- }
- }
+ if (!MO.isReg()) continue;
+ Register Reg = MO.getReg();
+ if (!Reg) continue;
+ if (Register::isVirtualRegister(Reg)) {
+ VirtOpEnd = i+1;
+ if (MO.isUse()) {
+ hasTiedOps = hasTiedOps ||
+ MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1;
} else {
- // Assign virtual register defs.
- for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
- MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg() || !MO.isDef())
- continue;
- Register Reg = MO.getReg();
- if (Reg.isVirtual())
- defineVirtReg(MI, I, Reg);
- }
- }
- }
-
- // Free registers occupied by defs.
- // Iterate operands in reverse order, so we see the implicit super register
- // defs first (we added them earlier in case of <def,read-undef>).
- for (unsigned I = MI.getNumOperands(); I-- > 0;) {
- MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg() || !MO.isDef())
- continue;
-
- // subreg defs don't free the full register. We left the subreg number
- // around as a marker in setPhysReg() to recognize this case here.
- if (MO.getSubReg() != 0) {
- MO.setSubReg(0);
- continue;
- }
-
- // Do not free tied operands and early clobbers.
- if (MO.isTied() || MO.isEarlyClobber())
- continue;
- Register Reg = MO.getReg();
- if (!Reg)
- continue;
- assert(Reg.isPhysical());
- if (MRI->isReserved(Reg))
- continue;
- freePhysReg(Reg);
- unmarkRegUsedInInstr(Reg);
- }
- }
-
- // Displace clobbered registers.
- if (HasRegMask) {
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isRegMask()) {
- // MRI bookkeeping.
- MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
-
- // Displace clobbered registers.
- const uint32_t *Mask = MO.getRegMask();
- for (LiveRegMap::iterator LRI = LiveVirtRegs.begin(),
- LRIE = LiveVirtRegs.end(); LRI != LRIE; ++LRI) {
- MCPhysReg PhysReg = LRI->PhysReg;
- if (PhysReg != 0 && MachineOperand::clobbersPhysReg(Mask, PhysReg))
- displacePhysReg(MI, PhysReg);
- }
+ if (MO.isEarlyClobber())
+ hasEarlyClobbers = true;
+ if (MO.getSubReg() && MI.readsVirtualRegister(Reg))
+ hasPartialRedefs = true;
}
+ continue;
}
+ if (!MRI->isAllocatable(Reg)) continue;
+ if (MO.isUse()) {
+ usePhysReg(MO);
+ } else if (MO.isEarlyClobber()) {
+ definePhysReg(MI, Reg,
+ (MO.isImplicit() || MO.isDead()) ? regFree : regReserved);
+ hasEarlyClobbers = true;
+ } else
+ hasPhysDefs = true;
}
- // Apply pre-assigned register uses to state.
- if (HasPhysRegUse) {
- for (MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || !MO.readsReg())
- continue;
- Register Reg = MO.getReg();
- if (!Reg.isPhysical())
- continue;
- if (MRI->isReserved(Reg))
- continue;
- bool displacedAny = usePhysReg(MI, Reg);
- if (!displacedAny && !MRI->isReserved(Reg))
- MO.setIsKill(true);
- }
+ // The instruction may have virtual register operands that must be allocated
+ // the same register at use-time and def-time: early clobbers and tied
+ // operands. If there are also physical defs, these registers must avoid
+ // both physical defs and uses, making them more constrained than normal
+ // operands.
+ // Similarly, if there are multiple defs and tied operands, we must make
+ // sure the same register is allocated to uses and defs.
+ // We didn't detect inline asm tied operands above, so just make this extra
+ // pass for all inline asm.
+ if (MI.isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
+ (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
+ handleThroughOperands(MI, VirtDead);
+ // Don't attempt coalescing when we have funny stuff going on.
+ CopyDstReg = Register();
+ // Pretend we have early clobbers so the use operands get marked below.
+ // This is not necessary for the common case of a single tied use.
+ hasEarlyClobbers = true;
}
- // Allocate virtreg uses and insert reloads as necessary.
+ // Second scan.
+ // Allocate virtreg uses.
bool HasUndefUse = false;
- for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
+ for (unsigned I = 0; I != VirtOpEnd; ++I) {
MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg() || !MO.isUse())
- continue;
+ if (!MO.isReg()) continue;
Register Reg = MO.getReg();
if (!Reg.isVirtual())
continue;
+ if (MO.isUse()) {
+ if (MO.isUndef()) {
+ HasUndefUse = true;
+ // There is no need to allocate a register for an undef use.
+ continue;
+ }
- if (MO.isUndef()) {
- HasUndefUse = true;
- continue;
- }
-
-
- // Populate MayLiveAcrossBlocks in case the use block is allocated before
- // the def block (removing the vreg uses).
- mayLiveIn(Reg);
-
+ // Populate MayLiveAcrossBlocks in case the use block is allocated before
+ // the def block (removing the vreg uses).
+ mayLiveIn(Reg);
- assert(!MO.isInternalRead() && "Bundles not supported");
- assert(MO.readsReg() && "reading use");
- useVirtReg(MI, I, Reg);
+ LiveReg &LR = reloadVirtReg(MI, I, Reg, CopyDstReg);
+ MCPhysReg PhysReg = LR.PhysReg;
+ CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0;
+ if (setPhysReg(MI, MO, PhysReg))
+ killVirtReg(LR);
+ }
}
// Allocate undef operands. This is a separate step because in a situation
@@ -1294,40 +1109,76 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
}
}
- // Free early clobbers.
- if (HasEarlyClobber) {
- for (unsigned I = MI.getNumOperands(); I-- > 0; ) {
- MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber())
- continue;
- // subreg defs don't free the full register. We left the subreg number
- // around as a marker in setPhysReg() to recognize this case here.
- if (MO.getSubReg() != 0) {
- MO.setSubReg(0);
- continue;
- }
-
+ // Track registers defined by instruction - early clobbers and tied uses at
+ // this point.
+ UsedInInstr.clear();
+ if (hasEarlyClobbers) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg()) continue;
Register Reg = MO.getReg();
- if (!Reg)
- continue;
- assert(Reg.isPhysical() && "should have register assigned");
-
- // We sometimes get odd situations like:
- // early-clobber %x0 = INSTRUCTION %x0
- // which is semantically questionable as the early-clobber should
- // apply before the use. But in practice we consider the use to
- // happen before the early clobber now. Don't free the early clobber
- // register in this case.
- if (MI.readsRegister(Reg, TRI))
+ if (!Reg || !Reg.isPhysical())
continue;
-
- freePhysReg(Reg);
+ // Look for physreg defs and tied uses.
+ if (!MO.isDef() && !MO.isTied()) continue;
+ markRegUsedInInstr(Reg);
}
}
+ unsigned DefOpEnd = MI.getNumOperands();
+ if (MI.isCall()) {
+ // Spill all virtregs before a call. This serves one purpose: If an
+ // exception is thrown, the landing pad is going to expect to find
+ // registers in their spill slots.
+ // Note: although this is appealing to just consider all definitions
+ // as call-clobbered, this is not correct because some of those
+ // definitions may be used later on and we do not want to reuse
+ // those for virtual registers in between.
+ LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n");
+ spillAll(MI, /*OnlyLiveOut*/ false);
+ }
+
+ // Third scan.
+ // Mark all physreg defs as used before allocating virtreg defs.
+ for (unsigned I = 0; I != DefOpEnd; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
+ continue;
+ Register Reg = MO.getReg();
+
+ if (!Reg || !Reg.isPhysical() || !MRI->isAllocatable(Reg))
+ continue;
+ definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
+ }
+
+ // Fourth scan.
+ // Allocate defs and collect dead defs.
+ for (unsigned I = 0; I != DefOpEnd; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
+ continue;
+ Register Reg = MO.getReg();
+
+ // We have already dealt with phys regs in the previous scan.
+ if (Reg.isPhysical())
+ continue;
+ MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg);
+ if (setPhysReg(MI, MI.getOperand(I), PhysReg)) {
+ VirtDead.push_back(Reg);
+ CopyDstReg = Register(); // cancel coalescing;
+ } else
+ CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0;
+ }
+
+ // Kill dead defs after the scan to ensure that multiple defs of the same
+ // register are allocated identically. We didn't need to do this for uses
+ // because we are creating our own kill flags, and they are always at the last
+ // use.
+ for (Register VirtReg : VirtDead)
+ killVirtReg(VirtReg);
+ VirtDead.clear();
+
LLVM_DEBUG(dbgs() << "<< " << MI);
- if (MI.isCopy() && MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
- MI.getNumOperands() == 2) {
+ if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) {
LLVM_DEBUG(dbgs() << "Mark identity copy for removal\n");
Coalesced.push_back(&MI);
}
@@ -1344,22 +1195,23 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) {
if (!Register::isVirtualRegister(Reg))
return;
- // Already spilled to a stackslot?
- int SS = StackSlotForVirtReg[Reg];
- if (SS != -1) {
- // Modify DBG_VALUE now that the value is in a spill slot.
- updateDbgValueForSpill(MI, SS);
- LLVM_DEBUG(dbgs() << "Rewrite DBG_VALUE for spilled memory: " << MI);
- return;
- }
-
// See if this virtual register has already been allocated to a physical
// register or spilled to a stack slot.
LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
if (LRI != LiveVirtRegs.end() && LRI->PhysReg) {
setPhysReg(MI, MO, LRI->PhysReg);
} else {
- DanglingDbgValues[Reg].push_back(&MI);
+ int SS = StackSlotForVirtReg[Reg];
+ if (SS != -1) {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ updateDbgValueForSpill(MI, SS);
+ LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << MI);
+ return;
+ }
+
+ // We can't allocate a physreg for a DebugValue, sorry!
+ LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+ MO.setReg(Register());
}
// If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so
@@ -1367,17 +1219,6 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) {
LiveDbgValueMap[Reg].push_back(&MI);
}
-#ifndef NDEBUG
-bool RegAllocFast::verifyRegStateMapping(const LiveReg &LR) const {
- for (MCRegUnitIterator UI(LR.PhysReg, TRI); UI.isValid(); ++UI) {
- if (RegUnitStates[*UI] != LR.VirtReg)
- return false;
- }
-
- return true;
-}
-#endif
-
void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
this->MBB = &MBB;
LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
@@ -1385,15 +1226,18 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
RegUnitStates.assign(TRI->getNumRegUnits(), regFree);
assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
- for (MachineBasicBlock *Succ : MBB.successors()) {
- for (const MachineBasicBlock::RegisterMaskPair &LI : Succ->liveins())
- setPhysRegState(LI.PhysReg, regPreAssigned);
- }
+ MachineBasicBlock::iterator MII = MBB.begin();
+ // Add live-in registers as live.
+ for (const MachineBasicBlock::RegisterMaskPair &LI : MBB.liveins())
+ if (MRI->isAllocatable(LI.PhysReg))
+ definePhysReg(MII, LI.PhysReg, regReserved);
+
+ VirtDead.clear();
Coalesced.clear();
- // Traverse block in reverse order allocating instructions one by one.
- for (MachineInstr &MI : reverse(MBB)) {
+ // Otherwise, sequentially allocate each instruction in the MBB.
+ for (MachineInstr &MI : MBB) {
LLVM_DEBUG(
dbgs() << "\n>> " << MI << "Regs:";
dumpState()
@@ -1409,14 +1253,9 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
allocateInstruction(MI);
}
- LLVM_DEBUG(
- dbgs() << "Begin Regs:";
- dumpState()
- );
-
// Spill all physical registers holding virtual registers now.
- LLVM_DEBUG(dbgs() << "Loading live registers at begin of block.\n");
- reloadAtBegin(MBB);
+ LLVM_DEBUG(dbgs() << "Spilling live registers at end of block.\n");
+ spillAll(MBB.getFirstTerminator(), /*OnlyLiveOut*/ true);
// Erase all the coalesced copies. We are delaying it until now because
// LiveVirtRegs might refer to the instrs.
@@ -1424,20 +1263,6 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
MBB.erase(MI);
NumCoalesced += Coalesced.size();
- for (auto &UDBGPair : DanglingDbgValues) {
- for (MachineInstr *DbgValue : UDBGPair.second) {
- assert(DbgValue->isDebugValue() && "expected DBG_VALUE");
- MachineOperand &MO = DbgValue->getOperand(0);
- // Nothing to do if the vreg was spilled in the meantime.
- if (!MO.isReg())
- continue;
- LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue
- << '\n');
- MO.setReg(0);
- }
- }
- DanglingDbgValues.clear();
-
LLVM_DEBUG(MBB.dump());
}
@@ -1451,11 +1276,8 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
MFI = &MF.getFrameInfo();
MRI->freezeReservedRegs(MF);
RegClassInfo.runOnMachineFunction(MF);
- unsigned NumRegUnits = TRI->getNumRegUnits();
UsedInInstr.clear();
- UsedInInstr.setUniverse(NumRegUnits);
- PhysRegUses.clear();
- PhysRegUses.setUniverse(NumRegUnits);
+ UsedInInstr.setUniverse(TRI->getNumRegUnits());
// initialize the virtual->physical register map to have a 'null'
// mapping for all virtual registers
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll b/llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll
index 296795b32761..cbeac5d85fc4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll
@@ -31,8 +31,9 @@ target triple = "arm64-apple-ios13.0.0"
; This test checks that we don't re-use the register for the variable descriptor
; for the second ldr.
; CHECK: adrp x[[PTR1:[0-9]+]], _t_val at TLVPPAGE
-; CHECK: ldr x0, [x[[PTR1]], _t_val at TLVPPAGEOFF]
-; CHECK: ldr x[[FPTR:[0-9]+]], [x0]
+; CHECK: ldr x[[PTR1]], [x[[PTR1]], _t_val at TLVPPAGEOFF]
+; CHECK: ldr x[[FPTR:[0-9]+]], [x[[PTR1]]]
+; CHECK: mov x0, x[[PTR1]]
; CHECK: blr x[[FPTR]]
define void @_Z4funcPKc(i8* %id) {
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll
index d563ccb851ce..805ba09bace2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll
@@ -94,7 +94,7 @@ entry:
store i32 %c, i32* %c.addr, align 4
store i64 %d, i64* %d.addr, align 8
%0 = load i16, i16* %b.addr, align 2
-; CHECK: tbz {{w[0-9]+}}, #0, LBB4_2
+; CHECK: tbz w8, #0, LBB4_2
%conv = trunc i16 %0 to i1
br i1 %conv, label %if.then, label %if.end
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll
index 586b7d116f5c..6b5799bdefd9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll
@@ -79,7 +79,8 @@ declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8
define i32 @t2() {
entry:
; CHECK-LABEL: t2
-; CHECK: mov x0, xzr
+; CHECK: mov [[REG1:x[0-9]+]], xzr
+; CHECK: mov x0, [[REG1]]
; CHECK: mov w1, #-8
; CHECK: mov [[REG2:w[0-9]+]], #1023
; CHECK: uxth w2, [[REG2]]
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
index b3c073f53542..7c546936ba27 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
@@ -4,8 +4,9 @@
define i32 @fptosi_wh(half %a) nounwind ssp {
entry:
; CHECK-LABEL: fptosi_wh
-; CHECK: fcvt [[REG:s[0-9]+]], h0
-; CHECK: fcvtzs w0, [[REG]]
+; CHECK: fcvt s0, h0
+; CHECK: fcvtzs [[REG:w[0-9]+]], s0
+; CHECK: mov w0, [[REG]]
%conv = fptosi half %a to i32
ret i32 %conv
}
@@ -14,8 +15,9 @@ entry:
define i32 @fptoui_swh(half %a) nounwind ssp {
entry:
; CHECK-LABEL: fptoui_swh
-; CHECK: fcvt [[REG:s[0-9]+]], h0
-; CHECK: fcvtzu w0, [[REG]]
+; CHECK: fcvt s0, h0
+; CHECK: fcvtzu [[REG:w[0-9]+]], s0
+; CHECK: mov w0, [[REG]]
%conv = fptoui half %a to i32
ret i32 %conv
}
@@ -24,8 +26,8 @@ entry:
define half @sitofp_hw_i1(i1 %a) nounwind ssp {
entry:
; CHECK-LABEL: sitofp_hw_i1
-; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1
-; CHECK: scvtf s0, [[REG]]
+; CHECK: sbfx w8, w0, #0, #1
+; CHECK: scvtf s0, w8
; CHECK: fcvt h0, s0
%conv = sitofp i1 %a to half
ret half %conv
@@ -35,8 +37,8 @@ entry:
define half @sitofp_hw_i8(i8 %a) nounwind ssp {
entry:
; CHECK-LABEL: sitofp_hw_i8
-; CHECK: sxtb [[REG:w[0-9]+]], w0
-; CHECK: scvtf s0, [[REG]]
+; CHECK: sxtb w8, w0
+; CHECK: scvtf s0, w8
; CHECK: fcvt h0, s0
%conv = sitofp i8 %a to half
ret half %conv
@@ -46,8 +48,8 @@ entry:
define half @sitofp_hw_i16(i16 %a) nounwind ssp {
entry:
; CHECK-LABEL: sitofp_hw_i16
-; CHECK: sxth [[REG:w[0-9]+]], w0
-; CHECK: scvtf s0, [[REG]]
+; CHECK: sxth w8, w0
+; CHECK: scvtf s0, w8
; CHECK: fcvt h0, s0
%conv = sitofp i16 %a to half
ret half %conv
@@ -77,8 +79,8 @@ entry:
define half @uitofp_hw_i1(i1 %a) nounwind ssp {
entry:
; CHECK-LABEL: uitofp_hw_i1
-; CHECK: and [[REG:w[0-9]+]], w0, #0x1
-; CHECK: ucvtf s0, [[REG]]
+; CHECK: and w8, w0, #0x1
+; CHECK: ucvtf s0, w8
; CHECK: fcvt h0, s0
%conv = uitofp i1 %a to half
ret half %conv
@@ -88,8 +90,8 @@ entry:
define half @uitofp_hw_i8(i8 %a) nounwind ssp {
entry:
; CHECK-LABEL: uitofp_hw_i8
-; CHECK: and [[REG:w[0-9]+]], w0, #0xff
-; CHECK: ucvtf s0, [[REG]]
+; CHECK: and w8, w0, #0xff
+; CHECK: ucvtf s0, w8
; CHECK: fcvt h0, s0
%conv = uitofp i8 %a to half
ret half %conv
@@ -99,8 +101,8 @@ entry:
define half @uitofp_hw_i16(i16 %a) nounwind ssp {
entry:
; CHECK-LABEL: uitofp_hw_i16
-; CHECK: and [[REG:w[0-9]+]], w0, #0xffff
-; CHECK: ucvtf s0, [[REG]]
+; CHECK: and w8, w0, #0xffff
+; CHECK: ucvtf s0, w8
; CHECK: fcvt h0, s0
%conv = uitofp i16 %a to half
ret half %conv
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
index 26ce3a3b94aa..d8abf14c1366 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -mcpu=cyclone < %s | FileCheck -enable-var-scope %s
+; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -mcpu=cyclone < %s | FileCheck %s
;; Test various conversions.
define zeroext i32 @trunc_(i8 zeroext %a, i16 zeroext %b, i32 %c, i64 %d) nounwind ssp {
@@ -49,12 +49,13 @@ entry:
; CHECK: strh w1, [sp, #12]
; CHECK: str w2, [sp, #8]
; CHECK: str x3, [sp]
-; CHECK: ldrb [[REG0:w[0-9]+]], [sp, #15]
-; CHECK: strh [[REG0]], [sp, #12]
-; CHECK: ldrh [[REG1:w[0-9]+]], [sp, #12]
-; CHECK: str [[REG1]], [sp, #8]
-; CHECK: ldr w[[REG2:[0-9]+]], [sp, #8]
-; CHECK: str x[[REG2]], [sp]
+; CHECK: ldrb w8, [sp, #15]
+; CHECK: strh w8, [sp, #12]
+; CHECK: ldrh w8, [sp, #12]
+; CHECK: str w8, [sp, #8]
+; CHECK: ldr w8, [sp, #8]
+; CHECK: ; kill: def $x8 killed $w8
+; CHECK: str x8, [sp]
; CHECK: ldr x0, [sp]
; CHECK: ret
%a.addr = alloca i8, align 1
@@ -104,12 +105,12 @@ entry:
; CHECK: strh w1, [sp, #12]
; CHECK: str w2, [sp, #8]
; CHECK: str x3, [sp]
-; CHECK: ldrsb [[REG0:w[0-9]+]], [sp, #15]
-; CHECK: strh [[REG0]], [sp, #12]
-; CHECK: ldrsh [[REG1:w[0-9]+]], [sp, #12]
-; CHECK: str [[REG1]], [sp, #8]
-; CHECK: ldrsw [[REG2:x[0-9]+]], [sp, #8]
-; CHECK: str [[REG2]], [sp]
+; CHECK: ldrsb w8, [sp, #15]
+; CHECK: strh w8, [sp, #12]
+; CHECK: ldrsh w8, [sp, #12]
+; CHECK: str w8, [sp, #8]
+; CHECK: ldrsw x8, [sp, #8]
+; CHECK: str x8, [sp]
; CHECK: ldr x0, [sp]
; CHECK: ret
%a.addr = alloca i8, align 1
@@ -165,8 +166,8 @@ entry:
define signext i16 @sext_i1_i16(i1 %a) nounwind ssp {
entry:
; CHECK-LABEL: sext_i1_i16
-; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1
-; CHECK: sxth w0, [[REG]]
+; CHECK: sbfx w8, w0, #0, #1
+; CHECK-NEXT: sxth w0, w8
%conv = sext i1 %a to i16
ret i16 %conv
}
@@ -175,8 +176,8 @@ entry:
define signext i8 @sext_i1_i8(i1 %a) nounwind ssp {
entry:
; CHECK-LABEL: sext_i1_i8
-; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1
-; CHECK: sxtb w0, [[REG]]
+; CHECK: sbfx w8, w0, #0, #1
+; CHECK-NEXT: sxtb w0, w8
%conv = sext i1 %a to i8
ret i8 %conv
}
@@ -239,8 +240,8 @@ entry:
define float @sitofp_sw_i1(i1 %a) nounwind ssp {
entry:
; CHECK-LABEL: sitofp_sw_i1
-; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1
-; CHECK: scvtf s0, [[REG]]
+; CHECK: sbfx w8, w0, #0, #1
+; CHECK: scvtf s0, w8
%conv = sitofp i1 %a to float
ret float %conv
}
@@ -249,8 +250,8 @@ entry:
define float @sitofp_sw_i8(i8 %a) nounwind ssp {
entry:
; CHECK-LABEL: sitofp_sw_i8
-; CHECK: sxtb [[REG:w[0-9]+]], w0
-; CHECK: scvtf s0, [[REG]]
+; CHECK: sxtb w8, w0
+; CHECK: scvtf s0, w8
%conv = sitofp i8 %a to float
ret float %conv
}
@@ -303,8 +304,8 @@ entry:
define float @uitofp_sw_i1(i1 %a) nounwind ssp {
entry:
; CHECK-LABEL: uitofp_sw_i1
-; CHECK: and [[REG:w[0-9]+]], w0, #0x1
-; CHECK: ucvtf s0, [[REG]]
+; CHECK: and w8, w0, #0x1
+; CHECK: ucvtf s0, w8
%conv = uitofp i1 %a to float
ret float %conv
}
@@ -373,8 +374,7 @@ entry:
define zeroext i16 @i64_trunc_i16(i64 %a) nounwind ssp {
entry:
; CHECK-LABEL: i64_trunc_i16
-; CHECK: mov x[[TMP:[0-9]+]], x0
-; CHECK: and [[REG2:w[0-9]+]], w[[TMP]], #0xffff{{$}}
+; CHECK: and [[REG2:w[0-9]+]], w0, #0xffff
; CHECK: uxth w0, [[REG2]]
%conv = trunc i64 %a to i16
ret i16 %conv
@@ -383,8 +383,7 @@ entry:
define zeroext i8 @i64_trunc_i8(i64 %a) nounwind ssp {
entry:
; CHECK-LABEL: i64_trunc_i8
-; CHECK: mov x[[TMP:[0-9]+]], x0
-; CHECK: and [[REG2:w[0-9]+]], w[[TMP]], #0xff{{$}}
+; CHECK: and [[REG2:w[0-9]+]], w0, #0xff
; CHECK: uxtb w0, [[REG2]]
%conv = trunc i64 %a to i8
ret i8 %conv
@@ -393,8 +392,7 @@ entry:
define zeroext i1 @i64_trunc_i1(i64 %a) nounwind ssp {
entry:
; CHECK-LABEL: i64_trunc_i1
-; CHECK: mov x[[TMP:[0-9]+]], x0
-; CHECK: and [[REG2:w[0-9]+]], w[[TMP]], #0x1{{$}}
+; CHECK: and [[REG2:w[0-9]+]], w0, #0x1
; CHECK: and w0, [[REG2]], #0x1
%conv = trunc i64 %a to i1
ret i1 %conv
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
index 8d35af2737b4..e1e889b906c0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
@@ -210,10 +210,10 @@ define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) noun
;
; FAST-LABEL: test_vcvt_high_f32_f64:
; FAST: // %bb.0:
+; FAST-NEXT: // implicit-def: $q2
; FAST-NEXT: mov.16b v2, v0
-; FAST-NEXT: // implicit-def: $q0
+; FAST-NEXT: fcvtn2 v2.4s, v1.2d
; FAST-NEXT: mov.16b v0, v2
-; FAST-NEXT: fcvtn2 v0.4s, v1.2d
; FAST-NEXT: ret
;
; GISEL-LABEL: test_vcvt_high_f32_f64:
@@ -249,10 +249,10 @@ define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nou
;
; FAST-LABEL: test_vcvtx_high_f32_f64:
; FAST: // %bb.0:
+; FAST-NEXT: // implicit-def: $q2
; FAST-NEXT: mov.16b v2, v0
-; FAST-NEXT: // implicit-def: $q0
+; FAST-NEXT: fcvtxn2 v2.4s, v1.2d
; FAST-NEXT: mov.16b v0, v2
-; FAST-NEXT: fcvtxn2 v0.4s, v1.2d
; FAST-NEXT: ret
;
; GISEL-LABEL: test_vcvtx_high_f32_f64:
@@ -283,12 +283,17 @@ define i16 @to_half(float %in) {
;
; FAST-LABEL: to_half:
; FAST: // %bb.0:
-; FAST-NEXT: fcvt h1, s0
+; FAST-NEXT: sub sp, sp, #16 // =16
+; FAST-NEXT: .cfi_def_cfa_offset 16
+; FAST-NEXT: fcvt h0, s0
; FAST-NEXT: // implicit-def: $w0
-; FAST-NEXT: fmov s0, w0
-; FAST-NEXT: mov.16b v0, v1
-; FAST-NEXT: fmov w0, s0
-; FAST-NEXT: // kill: def $w1 killed $w0
+; FAST-NEXT: fmov s1, w0
+; FAST-NEXT: mov.16b v1, v0
+; FAST-NEXT: fmov w8, s1
+; FAST-NEXT: mov w0, w8
+; FAST-NEXT: str w0, [sp, #12] // 4-byte Folded Spill
+; FAST-NEXT: mov w0, w8
+; FAST-NEXT: add sp, sp, #16 // =16
; FAST-NEXT: ret
;
; GISEL-LABEL: to_half:
diff --git a/llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll b/llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll
index 3c71ee1ee58c..0467a2cba831 100644
--- a/llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll
+++ b/llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll
@@ -17,9 +17,8 @@ declare [2 x i32] @callee()
define void @test_struct_return(i32* %addr) {
; CHECK-LABEL: test_struct_return:
; CHECK: bl _callee
-; CHECK: x[[COPYX0:[0-9]+]], x0
-; CHECK-DAG: lsr [[HI:x[0-9]+]], x[[COPYX0]], #32
-; CHECK-DAG: str w[[COPYX0]]
+; CHECK-DAG: lsr [[HI:x[0-9]+]], x0, #32
+; CHECK-DAG: str w0
%res = call [2 x i32] @callee()
%res.0 = extractvalue [2 x i32] %res, 0
store i32 %res.0, i32* %addr
diff --git a/llvm/test/CodeGen/AArch64/arm64_32-null.ll b/llvm/test/CodeGen/AArch64/arm64_32-null.ll
index 6360b6298160..9d62c56248b5 100644
--- a/llvm/test/CodeGen/AArch64/arm64_32-null.ll
+++ b/llvm/test/CodeGen/AArch64/arm64_32-null.ll
@@ -13,12 +13,11 @@ define void @test_store(i8** %p) {
define void @test_phi(i8** %p) {
; CHECK-LABEL: test_phi:
; CHECK: mov [[R1:x[0-9]+]], xzr
-; CHECK: str [[R1]], [sp, #8]
+; CHECK: str [[R1]], [sp]
; CHECK: b [[BB:LBB[0-9_]+]]
; CHECK: [[BB]]:
-; CHECK: ldr x0, [sp, #8]
-; CHECK: mov w8, w0
-; CHECK: str w8, [x{{.*}}]
+; CHECK: ldr x0, [sp]
+; CHECK: str w0, [x{{.*}}]
bb0:
br label %bb1
diff --git a/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll b/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll
index 9edf9e6d82df..46532386783f 100644
--- a/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll
+++ b/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll
@@ -64,9 +64,9 @@ bb3:
; OPT: b.gt [[L:\.LBB[0-9_]+]]
; OPT: tbz w1, #0, [[L]]
;
-; NOOPT: str w1, [sp, #[[SLOT2:[0-9]+]]]
; NOOPT: subs w{{[0-9]+}}, w{{[0-9]+}}, #0
; NOOPT: cset [[R1:w[0-9]+]], gt
+; NOOPT: str w1, [sp, #[[SLOT2:[0-9]+]]]
; NOOPT: str [[R1]], [sp, #[[SLOT1:[0-9]+]]]
; NOOPT: b .LBB
; NOOPT: ldr [[R2:w[0-9]+]], [sp, #[[SLOT1]]]
diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll b/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
index 43e36dd88209..bfb7b5809f21 100644
--- a/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
+++ b/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
@@ -1,16 +1,16 @@
-; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck -enable-var-scope %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s
define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_8:
-; CHECK: mov [[ADDR:x[0-9]+]], x0
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxrb [[OLD:w[0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: mov [[STATUS:w[3-9]+]], #0
+; CHECK: ldaxrb [[OLD:w[0-9]+]], [x0]
; CHECK: cmp [[OLD]], w1, uxtb
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: stlxrb [[STATUS:w[0-9]+]], w2, {{\[}}[[ADDR]]{{\]}}
+; CHECK: stlxrb [[STATUS]], w2, [x0]
; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]:
-; CHECK: subs {{w[0-9]+}}, [[OLD]], w1, uxtb
+; CHECK: subs {{w[0-9]+}}, [[OLD]], w1
; CHECK: cset {{w[0-9]+}}, eq
%res = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst monotonic
ret { i8, i1 } %res
@@ -18,12 +18,12 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_16:
-; CHECK: mov [[ADDR:x[0-9]+]], x0
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxrh [[OLD:w[0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: mov [[STATUS:w[3-9]+]], #0
+; CHECK: ldaxrh [[OLD:w[0-9]+]], [x0]
; CHECK: cmp [[OLD]], w1, uxth
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: stlxrh [[STATUS:w[3-9]]], w2, {{\[}}[[ADDR]]{{\]}}
+; CHECK: stlxrh [[STATUS:w[3-9]]], w2, [x0]
; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]:
; CHECK: subs {{w[0-9]+}}, [[OLD]], w1
@@ -34,12 +34,12 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind
define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_32:
-; CHECK: mov [[ADDR:x[0-9]+]], x0
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxr [[OLD:w[0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: mov [[STATUS:w[3-9]+]], #0
+; CHECK: ldaxr [[OLD:w[0-9]+]], [x0]
; CHECK: cmp [[OLD]], w1
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: stlxr [[STATUS:w[0-9]+]], w2, {{\[}}[[ADDR]]{{\]}}
+; CHECK: stlxr [[STATUS]], w2, [x0]
; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]:
; CHECK: subs {{w[0-9]+}}, [[OLD]], w1
@@ -50,12 +50,12 @@ define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind
define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_64:
-; CHECK: mov [[ADDR:x[0-9]+]], x0
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxr [[OLD:x[0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: mov [[STATUS:w[3-9]+]], #0
+; CHECK: ldaxr [[OLD:x[0-9]+]], [x0]
; CHECK: cmp [[OLD]], x1
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: stlxr [[STATUS:w[0-9]+]], x2, {{\[}}[[ADDR]]{{\]}}
+; CHECK: stlxr [[STATUS]], x2, [x0]
; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]:
; CHECK: subs {{x[0-9]+}}, [[OLD]], x1
@@ -66,15 +66,14 @@ define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind
define { i128, i1 } @test_cmpxchg_128(i128* %addr, i128 %desired, i128 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_128:
-; CHECK: mov [[ADDR:x[0-9]+]], x0
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [x0]
; CHECK: cmp [[OLD_LO]], x2
; CHECK: cset [[CMP_TMP:w[0-9]+]], ne
; CHECK: cmp [[OLD_HI]], x3
; CHECK: cinc [[CMP:w[0-9]+]], [[CMP_TMP]], ne
; CHECK: cbnz [[CMP]], [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: stlxp [[STATUS:w[0-9]+]], x4, x5, {{\[}}[[ADDR]]{{\]}}
+; CHECK: stlxp [[STATUS:w[0-9]+]], x4, x5, [x0]
; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]:
%res = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst monotonic
@@ -87,18 +86,17 @@ define { i128, i1 } @test_cmpxchg_128(i128* %addr, i128 %desired, i128 %new) nou
@var128 = global i128 0
define {i128, i1} @test_cmpxchg_128_unsplit(i128* %addr) {
; CHECK-LABEL: test_cmpxchg_128_unsplit:
-; CHECK: mov [[ADDR:x[0-9]+]], x0
; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128
; CHECK: ldp [[DESIRED_LO:x[0-9]+]], [[DESIRED_HI:x[0-9]+]], [x[[VAR128]]]
; CHECK: ldp [[NEW_LO:x[0-9]+]], [[NEW_HI:x[0-9]+]], [x[[VAR128]]]
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [x0]
; CHECK: cmp [[OLD_LO]], [[DESIRED_LO]]
; CHECK: cset [[CMP_TMP:w[0-9]+]], ne
; CHECK: cmp [[OLD_HI]], [[DESIRED_HI]]
; CHECK: cinc [[CMP:w[0-9]+]], [[CMP_TMP]], ne
; CHECK: cbnz [[CMP]], [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: stlxp [[STATUS:w[0-9]+]], [[NEW_LO]], [[NEW_HI]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: stlxp [[STATUS:w[0-9]+]], [[NEW_LO]], [[NEW_HI]], [x0]
; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]:
diff --git a/llvm/test/CodeGen/AArch64/combine-loads.ll b/llvm/test/CodeGen/AArch64/combine-loads.ll
index c94751d77982..22a71f5701f1 100644
--- a/llvm/test/CodeGen/AArch64/combine-loads.ll
+++ b/llvm/test/CodeGen/AArch64/combine-loads.ll
@@ -6,10 +6,10 @@ define <2 x i64> @z(i64* nocapture nonnull readonly %p) {
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI0_0
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: ldr x9, [x0]
-; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: mov v0.d[0], x9
-; CHECK-NEXT: mov v0.d[1], x8
+; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: ldr x9, [x0, #8]
+; CHECK-NEXT: mov v0.d[0], x8
+; CHECK-NEXT: mov v0.d[1], x9
; CHECK-NEXT: ret
%b = load i64, i64* %p
%p2 = getelementptr i64, i64* %p, i64 1
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll b/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll
index 82e3c2d4d61a..f03955c4dcd3 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll
@@ -1,19 +1,20 @@
; RUN: llc -mtriple=aarch64-- -O0 -fast-isel -fast-isel-abort=4 -verify-machineinstrs < %s | FileCheck %s
; CHECK-LABEL: cmpxchg_monotonic_32:
-; CHECK: mov [[ADDR:x[0-9]+]], x0
; CHECK: [[RETRY:.LBB[0-9_]+]]:
-; CHECK-NEXT: ldaxr w0, {{\[}}[[ADDR]]{{\]}}
-; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0
+; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0]
+; CHECK-NEXT: cmp [[OLD]], w1
; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]]
; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w2, {{\[}}[[ADDR]]{{\]}}
+; CHECK-NEXT: stlxr [[STATUS]], w2, [x0]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]:
-; CHECK-NEXT: cmp w0, w1
-; CHECK-NEXT: cset [[STATUS]], eq
+; CHECK-NEXT: cmp [[OLD]], w1
+; CHECK-NEXT: cset [[STATUS:w[0-9]+]], eq
; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1
; CHECK-NEXT: str [[STATUS32]], [x3]
+; CHECK-NEXT: mov w0, [[OLD]]
define i32 @cmpxchg_monotonic_32(i32* %p, i32 %cmp, i32 %new, i32* %ps) #0 {
%tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new monotonic monotonic
%tmp1 = extractvalue { i32, i1 } %tmp0, 0
@@ -25,20 +26,21 @@ define i32 @cmpxchg_monotonic_32(i32* %p, i32 %cmp, i32 %new, i32* %ps) #0 {
; CHECK-LABEL: cmpxchg_acq_rel_32_load:
; CHECK: // %bb.0:
-; CHECK: mov [[ADDR:x[0-9]+]], x0
-; CHECK: ldr [[NEW:w[0-9]+]], [x2]
+; CHECK: ldr [[NEW:w[0-9]+]], [x2]
; CHECK-NEXT: [[RETRY:.LBB[0-9_]+]]:
-; CHECK-NEXT: ldaxr w0, {{\[}}[[ADDR]]{{\]}}
-; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0
+; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0]
+; CHECK-NEXT: cmp [[OLD]], w1
; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]]
; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}}
+; CHECK-NEXT: stlxr [[STATUS]], [[NEW]], [x0]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]:
-; CHECK-NEXT: cmp w0, w1
-; CHECK-NEXT: cset [[STATUS]], eq
+; CHECK-NEXT: cmp [[OLD]], w1
+; CHECK-NEXT: cset [[STATUS:w[0-9]+]], eq
; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1
; CHECK-NEXT: str [[STATUS32]], [x3]
+; CHECK-NEXT: mov w0, [[OLD]]
define i32 @cmpxchg_acq_rel_32_load(i32* %p, i32 %cmp, i32* %pnew, i32* %ps) #0 {
%new = load i32, i32* %pnew
%tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel acquire
@@ -50,19 +52,20 @@ define i32 @cmpxchg_acq_rel_32_load(i32* %p, i32 %cmp, i32* %pnew, i32* %ps) #0
}
; CHECK-LABEL: cmpxchg_seq_cst_64:
-; CHECK: mov [[ADDR:x[0-9]+]], x0
; CHECK: [[RETRY:.LBB[0-9_]+]]:
-; CHECK-NEXT: ldaxr x0, {{\[}}[[ADDR]]{{\]}}
-; CHECK-NEXT: cmp x0, x1
+; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0
+; CHECK-NEXT: ldaxr [[OLD:x[0-9]+]], [x0]
+; CHECK-NEXT: cmp [[OLD]], x1
; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]]
; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: stlxr [[STATUS]], x2, {{\[}}[[ADDR]]{{\]}}
+; CHECK-NEXT: stlxr [[STATUS]], x2, [x0]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]:
-; CHECK-NEXT: cmp x0, x1
+; CHECK-NEXT: cmp [[OLD]], x1
; CHECK-NEXT: cset [[STATUS:w[0-9]+]], eq
; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1
; CHECK-NEXT: str [[STATUS32]], [x3]
+; CHECK-NEXT: mov x0, [[OLD]]
define i64 @cmpxchg_seq_cst_64(i64* %p, i64 %cmp, i64 %new, i32* %ps) #0 {
%tmp0 = cmpxchg i64* %p, i64 %cmp, i64 %new seq_cst seq_cst
%tmp1 = extractvalue { i64, i1 } %tmp0, 0
diff --git a/llvm/test/CodeGen/AArch64/popcount.ll b/llvm/test/CodeGen/AArch64/popcount.ll
index 2e5e988f0576..105969717e46 100644
--- a/llvm/test/CodeGen/AArch64/popcount.ll
+++ b/llvm/test/CodeGen/AArch64/popcount.ll
@@ -6,15 +6,15 @@ define i8 @popcount128(i128* nocapture nonnull readonly %0) {
; CHECK-LABEL: popcount128:
; CHECK: // %bb.0: // %Entry
; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: ldr d1, [x0]
-; CHECK-NEXT: // implicit-def: $q0
-; CHECK-NEXT: mov v0.16b, v1.16b
-; CHECK-NEXT: mov v0.d[1], x8
-; CHECK-NEXT: cnt v0.16b, v0.16b
-; CHECK-NEXT: uaddlv h1, v0.16b
-; CHECK-NEXT: // implicit-def: $q0
-; CHECK-NEXT: mov v0.16b, v1.16b
-; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: // implicit-def: $q1
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: mov v1.d[1], x8
+; CHECK-NEXT: cnt v0.16b, v1.16b
+; CHECK-NEXT: uaddlv h0, v0.16b
+; CHECK-NEXT: // implicit-def: $q1
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: fmov w0, s1
; CHECK-NEXT: ret
Entry:
%1 = load i128, i128* %0, align 16
@@ -32,24 +32,24 @@ define i16 @popcount256(i256* nocapture nonnull readonly %0) {
; CHECK: // %bb.0: // %Entry
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x0, #24]
-; CHECK-NEXT: ldr d1, [x0, #16]
-; CHECK-NEXT: // implicit-def: $q0
-; CHECK-NEXT: mov v0.16b, v1.16b
-; CHECK-NEXT: mov v0.d[1], x9
-; CHECK-NEXT: cnt v0.16b, v0.16b
-; CHECK-NEXT: uaddlv h1, v0.16b
-; CHECK-NEXT: // implicit-def: $q0
-; CHECK-NEXT: mov v0.16b, v1.16b
-; CHECK-NEXT: fmov w9, s0
-; CHECK-NEXT: ldr d1, [x0]
-; CHECK-NEXT: // implicit-def: $q0
-; CHECK-NEXT: mov v0.16b, v1.16b
-; CHECK-NEXT: mov v0.d[1], x8
-; CHECK-NEXT: cnt v0.16b, v0.16b
-; CHECK-NEXT: uaddlv h1, v0.16b
-; CHECK-NEXT: // implicit-def: $q0
-; CHECK-NEXT: mov v0.16b, v1.16b
-; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: ldr d0, [x0, #16]
+; CHECK-NEXT: // implicit-def: $q1
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: mov v1.d[1], x9
+; CHECK-NEXT: cnt v0.16b, v1.16b
+; CHECK-NEXT: uaddlv h0, v0.16b
+; CHECK-NEXT: // implicit-def: $q1
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: fmov w9, s1
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: // implicit-def: $q1
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: mov v1.d[1], x8
+; CHECK-NEXT: cnt v0.16b, v1.16b
+; CHECK-NEXT: uaddlv h0, v0.16b
+; CHECK-NEXT: // implicit-def: $q1
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: fmov w8, s1
; CHECK-NEXT: add w0, w8, w9
; CHECK-NEXT: ret
Entry:
@@ -69,10 +69,10 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) {
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: mov v0.d[1], x1
; CHECK-NEXT: cnt v0.16b, v0.16b
-; CHECK-NEXT: uaddlv h1, v0.16b
-; CHECK-NEXT: // implicit-def: $q0
-; CHECK-NEXT: mov v0.16b, v1.16b
-; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: uaddlv h0, v0.16b
+; CHECK-NEXT: // implicit-def: $q1
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: fmov w0, s1
; CHECK-NEXT: // kill: def $x0 killed $w0
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: mov x1, v0.d[1]
diff --git a/llvm/test/CodeGen/AArch64/swift-return.ll b/llvm/test/CodeGen/AArch64/swift-return.ll
index 2036faf39bdd..2bf5e379b379 100644
--- a/llvm/test/CodeGen/AArch64/swift-return.ll
+++ b/llvm/test/CodeGen/AArch64/swift-return.ll
@@ -203,10 +203,10 @@ declare swiftcc { double, double, double, double, i32, i32, i32, i32 } @gen6()
; CHECK-DAG: mov w3, w0
; CHECK: ret
; CHECK-O0-LABEL: _gen7
-; CHECK-O0: mov w3, w0
-; CHECK-O0: mov w0, w3
-; CHECK-O0: mov w1, w3
-; CHECK-O0: mov w2, w3
+; CHECK-O0: str w0, [sp, #12]
+; CHECK-O0: ldr w1, [sp, #12]
+; CHECK-O0: ldr w2, [sp, #12]
+; CHECK-O0: ldr w3, [sp, #12]
define swiftcc { i32, i32, i32, i32 } @gen7(i32 %key) {
%v0 = insertvalue { i32, i32, i32, i32 } undef, i32 %key, 0
%v1 = insertvalue { i32, i32, i32, i32 } %v0, i32 %key, 1
@@ -221,10 +221,10 @@ define swiftcc { i32, i32, i32, i32 } @gen7(i32 %key) {
; CHECK: mov w3, w0
; CHECK: ret
; CHECK-O0-LABEL: _gen9
-; CHECK-O0: mov w3, w0
-; CHECK-O0: mov w0, w3
-; CHECK-O0: mov w1, w3
-; CHECK-O0: mov w2, w3
+; CHECK-O0: str w0, [sp, #12]
+; CHECK-O0: ldr w1, [sp, #12]
+; CHECK-O0: ldr w2, [sp, #12]
+; CHECK-O0: ldr w3, [sp, #12]
define swiftcc { i8, i8, i8, i8 } @gen9(i8 %key) {
%v0 = insertvalue { i8, i8, i8, i8 } undef, i8 %key, 0
%v1 = insertvalue { i8, i8, i8, i8 } %v0, i8 %key, 1
diff --git a/llvm/test/CodeGen/AArch64/swifterror.ll b/llvm/test/CodeGen/AArch64/swifterror.ll
index e219ef770f93..a8635f682ff1 100644
--- a/llvm/test/CodeGen/AArch64/swifterror.ll
+++ b/llvm/test/CodeGen/AArch64/swifterror.ll
@@ -21,10 +21,11 @@ define float @foo(%swift_error** swifterror %error_ptr_ref) {
; CHECK-O0-LABEL: foo:
; CHECK-O0: mov w{{.*}}, #16
; CHECK-O0: malloc
-; CHECK-O0: mov x21, x0
-; CHECK-O0-NOT: x21
+; CHECK-O0: mov x1, x0
+; CHECK-O0-NOT: x1
; CHECK-O0: mov [[ID:w[0-9]+]], #1
; CHECK-O0: strb [[ID]], [x0, #8]
+; CHECK-O0: mov x21, x1
entry:
%call = call i8* @malloc(i64 16)
%call.0 = bitcast i8* %call to %swift_error*
@@ -137,12 +138,14 @@ define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) {
; CHECK-O0: cbz w0
; CHECK-O0: mov w{{.*}}, #16
; CHECK-O0: malloc
-; CHECK-O0: mov x21, x0
+; CHECK-O0: mov [[ID:x[0-9]+]], x0
; CHECK-O0: mov [[ID2:w[0-9]+]], #1
; CHECK-O0: strb [[ID2]], [x0, #8]
+; CHECK-O0: mov x21, [[ID]]
; CHECK-O0: ret
; reload from stack
-; CHECK-O0: ldr x21, [sp, [[SLOT]]]
+; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp, [[SLOT]]]
+; CHECK-O0: mov x21, [[ID3]]
; CHECK-O0: ret
entry:
%cond = icmp ne i32 %cc, 0
@@ -176,10 +179,10 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float
; CHECK-O0-AARCH64-LABEL: foo_loop:
; spill x21
-; CHECK-O0-AARCH64: stur x21, [x29, [[SLOT:#-[0-9]+]]]
+; CHECK-O0-AARCH64: str x21, [sp, [[SLOT:#[0-9]+]]]
; CHECK-O0-AARCH64: b [[BB1:[A-Za-z0-9_]*]]
; CHECK-O0-AARCH64: [[BB1]]:
-; CHECK-O0-AARCH64: ldur x0, [x29, [[SLOT]]]
+; CHECK-O0-AARCH64: ldr x0, [sp, [[SLOT]]]
; CHECK-O0-AARCH64: str x0, [sp, [[SLOT2:#[0-9]+]]]
; CHECK-O0-AARCH64: cbz {{.*}}, [[BB2:[A-Za-z0-9_]*]]
; CHECK-O0-AARCH64: mov w{{.*}}, #16
@@ -191,10 +194,11 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float
; CHECK-O0-AARCH64:[[BB2]]:
; CHECK-O0-AARCH64: ldr x0, [sp, [[SLOT2]]]
; CHECK-O0-AARCH64: fcmp
-; CHECK-O0-AARCH64: stur x0, [x29, [[SLOT]]]
+; CHECK-O0-AARCH64: str x0, [sp]
; CHECK-O0-AARCH64: b.le [[BB1]]
; reload from stack
-; CHECK-O0-AARCH64: ldr x21, [sp]
+; CHECK-O0-AARCH64: ldr [[ID3:x[0-9]+]], [sp]
+; CHECK-O0-AARCH64: mov x21, [[ID3]]
; CHECK-O0-AARCH64: ret
; CHECK-O0-ARM64_32-LABEL: foo_loop:
@@ -211,12 +215,14 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float
; CHECK-O0-ARM64_32: strb w{{.*}},
; CHECK-O0-ARM64_32:[[BB2]]:
; CHECK-O0-ARM64_32: ldr x0, [sp, [[SLOT2]]]
-; CHECK-O0-ARM64_32: str x0, [sp[[OFFSET:.*]]]
; CHECK-O0-ARM64_32: fcmp
+; CHECK-O0-ARM64_32: str x0, [sp[[OFFSET:.*]]]
; CHECK-O0-ARM64_32: b.le [[BB1]]
; reload from stack
-; CHECK-O0-ARM64_32: ldr x21, [sp[[OFFSET]]]
+; CHECK-O0-ARM64_32: ldr [[ID3:x[0-9]+]], [sp[[OFFSET]]]
+; CHECK-O0-ARM64_32: mov x21, [[ID3]]
; CHECK-O0-ARM64_32: ret
+
entry:
br label %bb_loop
@@ -255,16 +261,16 @@ define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swi
; CHECK-APPLE-NOT: x21
; CHECK-O0-LABEL: foo_sret:
+; CHECK-O0: mov w{{.*}}, #16
; spill x8
; CHECK-O0-DAG: str x8
-; CHECK-O0: mov w{{.*}}, #16
; CHECK-O0: malloc
-; CHECK-O0: mov x10, x0
-; CHECK-O0: mov x21, x10
; CHECK-O0: mov [[ID:w[0-9]+]], #1
-; CHECK-O0: strb [[ID]], [x10, #8]
+; CHECK-O0: strb [[ID]], [x0, #8]
; reload from stack
-; CHECK-O0: str w{{.*}}, [x8, #4]
+; CHECK-O0: ldr [[SRET:x[0-9]+]]
+; CHECK-O0: str w{{.*}}, [{{.*}}[[SRET]], #4]
+; CHECK-O0: mov x21
; CHECK-O0-NOT: x21
entry:
%call = call i8* @malloc(i64 16)
@@ -293,7 +299,7 @@ define float @caller3(i8* %error_ref) {
; CHECK-O0-LABEL: caller3:
; spill x0
-; CHECK-O0: str x0, [sp, [[OFFSET:#[0-9]+]]]
+; CHECK-O0: str x0
; CHECK-O0: mov x21
; CHECK-O0: bl {{.*}}foo_sret
; CHECK-O0: mov [[ID2:x[0-9]+]], x21
@@ -301,8 +307,8 @@ define float @caller3(i8* %error_ref) {
; CHECK-O0-ARM64_32: cmp x21, #0
; Access part of the error object and save it to error_ref
; reload from stack
-; CHECK-O0: ldr [[ID:x[0-9]+]], [sp, [[OFFSET]]]
; CHECK-O0: ldrb [[CODE:w[0-9]+]]
+; CHECK-O0: ldr [[ID:x[0-9]+]]
; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]]
; CHECK-O0: bl {{.*}}free
entry:
@@ -624,10 +630,11 @@ declare swiftcc void @foo2(%swift_error** swifterror)
; Make sure we properly assign registers during fast-isel.
; CHECK-O0-LABEL: testAssign
-; CHECK-O0: mov x21, xzr
+; CHECK-O0: mov [[TMP:x.*]], xzr
+; CHECK-O0: mov x21, [[TMP]]
; CHECK-O0: bl _foo2
; CHECK-O0: str x21, [s[[STK:.*]]]
-; CHECK-O0: ldr x{{[0-9]+}}, [s[[STK]]]
+; CHECK-O0: ldr x0, [s[[STK]]]
; CHECK-APPLE-LABEL: testAssign
; CHECK-APPLE: mov x21, xzr
diff --git a/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir b/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir
index 7642c826acff..aacc3c6542c7 100644
--- a/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir
+++ b/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir
@@ -81,14 +81,14 @@ body: |
; CHECK: frame-setup CFI_INSTRUCTION offset $b21, -240
; CHECK: frame-setup CFI_INSTRUCTION offset $b22, -256
; CHECK: frame-setup CFI_INSTRUCTION offset $b23, -272
- ; CHECK: STRQui $q0, $sp, 0 :: (store 16 into %stack.1)
; CHECK: EH_LABEL <mcsymbol .Ltmp0>
- ; CHECK: BL @may_throw_neon, csr_aarch64_aavpcs, implicit-def dead $lr, implicit $sp, implicit killed $q0, implicit-def $q0
- ; CHECK: STRQui killed $q0, $sp, 1 :: (store 16 into %stack.0)
+ ; CHECK: STRQui $q0, $sp, 1 :: (store 16 into %stack.0)
+ ; CHECK: BL @may_throw_neon, csr_aarch64_aavpcs, implicit-def $lr, implicit $sp, implicit killed $q0, implicit-def $q0
; CHECK: EH_LABEL <mcsymbol .Ltmp1>
+ ; CHECK: STRQui killed $q0, $sp, 0 :: (store 16 into %stack.1)
; CHECK: B %bb.1
; CHECK: bb.1..Lcontinue:
- ; CHECK: $q0 = LDRQui $sp, 1 :: (load 16 from %stack.0)
+ ; CHECK: $q0 = LDRQui $sp, 0 :: (load 16 from %stack.1)
; CHECK: $fp, $lr = frame-destroy LDPXi $sp, 36 :: (load 8 from %stack.3), (load 8 from %stack.2)
; CHECK: $q9, $q8 = frame-destroy LDPQi $sp, 16 :: (load 16 from %stack.5), (load 16 from %stack.4)
; CHECK: $q11, $q10 = frame-destroy LDPQi $sp, 14 :: (load 16 from %stack.7), (load 16 from %stack.6)
@@ -103,7 +103,7 @@ body: |
; CHECK: bb.2..Lunwind (landing-pad):
; CHECK: liveins: $x0, $x1
; CHECK: EH_LABEL <mcsymbol .Ltmp2>
- ; CHECK: $q0 = LDRQui $sp, 0 :: (load 16 from %stack.1)
+ ; CHECK: $q0 = LDRQui $sp, 1 :: (load 16 from %stack.0)
; CHECK: $fp, $lr = frame-destroy LDPXi $sp, 36 :: (load 8 from %stack.3), (load 8 from %stack.2)
; CHECK: $q9, $q8 = frame-destroy LDPQi $sp, 16 :: (load 16 from %stack.5), (load 16 from %stack.4)
; CHECK: $q11, $q10 = frame-destroy LDPQi $sp, 14 :: (load 16 from %stack.7), (load 16 from %stack.6)
diff --git a/llvm/test/CodeGen/AArch64/unwind-preserved.ll b/llvm/test/CodeGen/AArch64/unwind-preserved.ll
index 33bbdfaa2cfd..68fec0825542 100644
--- a/llvm/test/CodeGen/AArch64/unwind-preserved.ll
+++ b/llvm/test/CodeGen/AArch64/unwind-preserved.ll
@@ -50,14 +50,14 @@ define <vscale x 4 x i32> @invoke_callee_may_throw_sve(<vscale x 4 x i32> %v) pe
; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: .Ltmp0:
+; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: bl may_throw_sve
; CHECK-NEXT: .Ltmp1:
-; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: b .LBB0_1
; CHECK-NEXT: .LBB0_1: // %.Lcontinue
-; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
@@ -92,7 +92,7 @@ define <vscale x 4 x i32> @invoke_callee_may_throw_sve(<vscale x 4 x i32> %v) pe
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_2: // %.Lunwind
; CHECK-NEXT: .Ltmp2:
-; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
@@ -172,14 +172,14 @@ define <vscale x 4 x i32> @invoke_callee_may_throw_sve(<vscale x 4 x i32> %v) pe
; GISEL-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG
; GISEL-NEXT: .cfi_offset w30, -8
; GISEL-NEXT: .cfi_offset w29, -16
-; GISEL-NEXT: str z0, [sp] // 16-byte Folded Spill
; GISEL-NEXT: .Ltmp0:
+; GISEL-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill
; GISEL-NEXT: bl may_throw_sve
; GISEL-NEXT: .Ltmp1:
-; GISEL-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill
+; GISEL-NEXT: str z0, [sp] // 16-byte Folded Spill
; GISEL-NEXT: b .LBB0_1
; GISEL-NEXT: .LBB0_1: // %.Lcontinue
-; GISEL-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
+; GISEL-NEXT: ldr z0, [sp] // 16-byte Folded Reload
; GISEL-NEXT: addvl sp, sp, #2
; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
@@ -214,7 +214,7 @@ define <vscale x 4 x i32> @invoke_callee_may_throw_sve(<vscale x 4 x i32> %v) pe
; GISEL-NEXT: ret
; GISEL-NEXT: .LBB0_2: // %.Lunwind
; GISEL-NEXT: .Ltmp2:
-; GISEL-NEXT: ldr z0, [sp] // 16-byte Folded Reload
+; GISEL-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
; GISEL-NEXT: addvl sp, sp, #2
; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
@@ -293,14 +293,14 @@ define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v)
; CHECK-NEXT: .cfi_offset b21, -240
; CHECK-NEXT: .cfi_offset b22, -256
; CHECK-NEXT: .cfi_offset b23, -272
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: .Ltmp3:
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: bl may_throw_neon
; CHECK-NEXT: .Ltmp4:
-; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: b .LBB1_1
; CHECK-NEXT: .LBB1_1: // %.Lcontinue
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload
; CHECK-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload
; CHECK-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload
@@ -314,7 +314,7 @@ define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v)
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB1_2: // %.Lunwind
; CHECK-NEXT: .Ltmp5:
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload
; CHECK-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload
; CHECK-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload
@@ -360,13 +360,13 @@ define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v)
; GISEL-NEXT: .cfi_offset b21, -240
; GISEL-NEXT: .cfi_offset b22, -256
; GISEL-NEXT: .cfi_offset b23, -272
-; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill
; GISEL-NEXT: .Ltmp3:
-; GISEL-NEXT: bl may_throw_neon
; GISEL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; GISEL-NEXT: bl may_throw_neon
; GISEL-NEXT: .Ltmp4:
+; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill
; GISEL-NEXT: // %bb.1: // %.Lcontinue
-; GISEL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; GISEL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; GISEL-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload
; GISEL-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload
; GISEL-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload
@@ -380,7 +380,7 @@ define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v)
; GISEL-NEXT: ret
; GISEL-NEXT: .LBB1_2: // %.Lunwind
; GISEL-NEXT: .Ltmp5:
-; GISEL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; GISEL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; GISEL-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload
; GISEL-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload
; GISEL-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll
index 6515d25f7415..587f808bc55e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll
@@ -28,15 +28,15 @@ define i32 @test_sgpr_matching_constraint() nounwind {
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: s_mov_b32 s5, 7
+; CHECK-NEXT: s_mov_b32 s4, 7
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: s_mov_b32 s4, 8
+; CHECK-NEXT: s_mov_b32 s5, 8
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: s_add_u32 s4, s5, s4
+; CHECK-NEXT: s_add_u32 s5, s4, s5
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_mov_b32_e32 v0, s4
+; CHECK-NEXT: v_mov_b32_e32 v0, s5
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
%asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
index 22a4fc98b436..6da332a596fb 100644
--- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
@@ -17,28 +17,29 @@
; GCN: s_mov_b32 m0, -1
; GCN: ds_read_b32 [[LOAD0:v[0-9]+]]
+; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0
+; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
+; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]]
+
; Spill load
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], 0 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill
-; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0
; Spill saved exec
-; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]]
; VMEM: v_writelane_b32 v[[V_SAVEEXEC:[0-9]+]], s[[SAVEEXEC_LO]], 0
; VMEM: v_writelane_b32 v[[V_SAVEEXEC]], s[[SAVEEXEC_HI]], 1
-; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET:[0-9]+]] ; 4-byte Folded Spill
+; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:20 ; 4-byte Folded Spill
-; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]]
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
; GCN: s_cbranch_execz [[ENDIF:BB[0-9]+_[0-9]+]]
; GCN: ; %bb.{{[0-9]+}}: ; %if
-; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
; GCN: s_mov_b32 m0, -1
; GCN: ds_read_b32 [[LOAD1:v[0-9]+]]
+; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -52,7 +53,9 @@
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]]
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET]] ; 4-byte Folded Reload
+
+
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:20 ; 4-byte Folded Reload
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 0
; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1
@@ -85,26 +88,29 @@ endif:
; VGPR: workitem_private_segment_byte_size = 16{{$}}
; GCN: {{^}}; %bb.0:
-; GCN-DAG: s_mov_b32 m0, -1
-; GCN-DAG: v_mov_b32_e32 [[PTR0:v[0-9]+]], 0{{$}}
-; GCN: ds_read_b32 [[LOAD0:v[0-9]+]], [[PTR0]]
-; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, v0
+
+; GCN: s_mov_b32 m0, -1
+; GCN: ds_read_b32 [[LOAD0:v[0-9]+]]
+
+; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0
+
+; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
+; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]]
; Spill load
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], 0 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill
-; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
; Spill saved exec
; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]]
+
; VMEM: v_writelane_b32 v[[V_SAVEEXEC:[0-9]+]], s[[SAVEEXEC_LO]], 0
; VMEM: v_writelane_b32 v[[V_SAVEEXEC]], s[[SAVEEXEC_HI]], 1
-; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET:[0-9]+]] ; 4-byte Folded Spill
-
+; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:24 ; 4-byte Folded Spill
-; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]]
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
+
; GCN-NEXT: s_cbranch_execz [[END:BB[0-9]+_[0-9]+]]
@@ -121,7 +127,7 @@ endif:
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]]
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET]] ; 4-byte Folded Reload
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:24 ; 4-byte Folded Reload
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 0
; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1
@@ -133,7 +139,7 @@ endif:
define amdgpu_kernel void @divergent_loop(i32 addrspace(1)* %out) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %load0 = load volatile i32, i32 addrspace(3)* null
+ %load0 = load volatile i32, i32 addrspace(3)* undef
%cmp0 = icmp eq i32 %tid, 0
br i1 %cmp0, label %loop, label %end
@@ -155,12 +161,8 @@ end:
; GCN-LABEL: {{^}}divergent_if_else_endif:
; GCN: {{^}}; %bb.0:
-; GCN-DAG: s_mov_b32 m0, -1
-; GCN-DAG: v_mov_b32_e32 [[PTR0:v[0-9]+]], 0{{$}}
-; GCN: ds_read_b32 [[LOAD0:v[0-9]+]], [[PTR0]]
-
-; Spill load
-; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], 0 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill
+; GCN: s_mov_b32 m0, -1
+; GCN: ds_read_b32 [[LOAD0:v[0-9]+]]
; GCN: s_mov_b32 [[ZERO:s[0-9]+]], 0
; GCN: v_cmp_ne_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], [[ZERO]], v0
@@ -169,6 +171,9 @@ end:
; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]]
; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}
+; Spill load
+; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], 0 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill
+
; Spill saved exec
; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]]
@@ -187,6 +192,7 @@ end:
; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]]
+
; VMEM: buffer_load_dword v[[FLOW_V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:[[SAVEEXEC_OFFSET]]
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC]], 0
@@ -213,8 +219,8 @@ end:
; GCN: ; %bb.{{[0-9]+}}: ; %if
-; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
; GCN: ds_read_b32
+; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]]
; GCN: buffer_store_dword [[ADD]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Spill
; GCN-NEXT: s_branch [[ENDIF:BB[0-9]+_[0-9]+]]
@@ -242,7 +248,7 @@ end:
define amdgpu_kernel void @divergent_if_else_endif(i32 addrspace(1)* %out) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %load0 = load volatile i32, i32 addrspace(3)* null
+ %load0 = load volatile i32, i32 addrspace(3)* undef
%cmp0 = icmp eq i32 %tid, 0
br i1 %cmp0, label %if, label %else
diff --git a/llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir b/llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir
deleted file mode 100644
index d50de439d47d..000000000000
--- a/llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir
+++ /dev/null
@@ -1,62 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=regallocfast -o - %s | FileCheck %s
-
-# Make sure incorrect kills aren't emitted on vcc
-
----
-name: foo
-tracksRegLiveness: true
-machineFunctionInfo:
- isEntryFunction: true
- scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
- stackPtrOffsetReg: '$sgpr32'
-body: |
- bb.0:
- liveins: $vgpr0
-
- ; CHECK-LABEL: name: foo
- ; CHECK: liveins: $vgpr0
- ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
- ; CHECK: $sgpr4_sgpr5 = COPY $vcc
- ; CHECK: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, 3, killed $vcc, implicit $exec
- ; CHECK: S_ENDPGM 0, implicit killed $vgpr0, implicit killed $sgpr4_sgpr5
- %0:vgpr_32 = COPY $vgpr0
- V_CMP_NE_U32_e32 0, %0, implicit-def $vcc, implicit $exec
- $sgpr4_sgpr5 = COPY $vcc
- %1:sreg_64_xexec = COPY $vcc
- %2:vgpr_32 = V_CNDMASK_B32_e64 0, -1, 0, 3, %1, implicit $exec
- $vgpr0 = COPY %2
- S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5
-
-...
-
-# This would hit "Unexpected reg unit state" assert.
----
-name: bar
-tracksRegLiveness: true
-machineFunctionInfo:
- isEntryFunction: true
- scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
- stackPtrOffsetReg: '$sgpr32'
-body: |
- bb.0:
- liveins: $vgpr0
-
- ; CHECK-LABEL: name: bar
- ; CHECK: liveins: $vgpr0
- ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
- ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc
- ; CHECK: SI_SPILL_S64_SAVE $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5)
- ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc
- ; CHECK: $vcc = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 8 from %stack.0, align 4, addrspace 5)
- ; CHECK: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, 3, killed $sgpr4_sgpr5, implicit $exec
- ; CHECK: S_ENDPGM 0, implicit killed $vgpr0, implicit killed renamable $vcc
- %0:vgpr_32 = COPY $vgpr0
- V_CMP_NE_U32_e32 0, %0, implicit-def $vcc, implicit $exec
- %3:sreg_64_xexec = COPY $vcc
- %1:sreg_64_xexec = COPY $vcc
- %2:vgpr_32 = V_CNDMASK_B32_e64 0, -1, 0, 3, %1, implicit $exec
- $vgpr0 = COPY %2
- S_ENDPGM 0, implicit $vgpr0, implicit %3
-
-...
diff --git a/llvm/test/CodeGen/AMDGPU/fastregalloc-illegal-subreg-physreg.mir b/llvm/test/CodeGen/AMDGPU/fastregalloc-illegal-subreg-physreg.mir
deleted file mode 100644
index bf32ebaf473d..000000000000
--- a/llvm/test/CodeGen/AMDGPU/fastregalloc-illegal-subreg-physreg.mir
+++ /dev/null
@@ -1,27 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=regallocfast -o - %s | FileCheck %s
-
-# This would hit "Illegal subregister index for physical register" verifier error since
-# tied operands would skip dropping the subregister index.
-
----
-name: invalid_subreg_index
-tracksRegLiveness: true
-machineFunctionInfo:
- isEntryFunction: true
-body: |
- bb.0:
- liveins: $vgpr0, $sgpr0
-
- ; CHECK-LABEL: name: invalid_subreg_index
- ; CHECK: liveins: $vgpr0, $sgpr0
- ; CHECK: $m0 = COPY renamable $sgpr0
- ; CHECK: undef renamable $vgpr1 = V_INTERP_P2_F32 undef $vgpr1, undef $vgpr0, 0, 1, implicit $mode, implicit $m0, implicit $exec, implicit-def dead $vgpr0_vgpr1
- ; CHECK: S_ENDPGM 0, implicit killed renamable $sgpr0
- %0:vgpr_32 = COPY $vgpr0
- %1:sgpr_32 = COPY $sgpr0
- $m0 = COPY %1
- undef %2.sub1:vreg_64 = V_INTERP_P2_F32 undef %2.sub1, undef %0:vgpr_32, 0, 1, implicit $mode, implicit $m0, implicit $exec
- S_ENDPGM 0, implicit %1
-
-...
diff --git a/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir b/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir
index 7e70eb3a952c..32de26283781 100644
--- a/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir
+++ b/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir
@@ -18,7 +18,7 @@ body: |
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec
- ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: bb.2:
; GCN: S_ENDPGM 0
@@ -53,10 +53,9 @@ body: |
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec
- ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, 0, 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec
; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec
- ; GCN: SI_SPILL_V32_SAVE $vgpr2, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
- ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, 0, 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: bb.2:
; GCN: S_ENDPGM 0
@@ -93,10 +92,9 @@ body: |
; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
; GCN: bb.1:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
- ; GCN: renamable $vgpr2 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec
- ; GCN: SI_SPILL_V32_SAVE $vgpr2, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
- ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, 0, 0, implicit $exec
+ ; GCN: renamable $vgpr0 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec
+ ; GCN: $vgpr1_vgpr2 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
+ ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: bb.2:
; GCN: S_ENDPGM 0
@@ -130,9 +128,9 @@ body: |
; GCN: bb.1:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
- ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, undef renamable $vgpr0, 0, 0, 0, 0, implicit $exec
- ; GCN: renamable $vgpr0 = V_ADD_U32_e64 1, 1, 0, implicit $exec
- ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
+ ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, undef renamable $vgpr0, 0, 0, 0, 0, implicit $exec
+ ; GCN: renamable $vgpr2 = V_ADD_U32_e64 1, 1, 0, implicit $exec
+ ; GCN: SI_SPILL_V32_SAVE killed $vgpr2, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: bb.2:
; GCN: S_ENDPGM 0
@@ -166,8 +164,9 @@ body: |
; GCN: bb.1:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
- ; GCN: undef renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit-def dead $vgpr2_vgpr3
- ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, undef renamable $vgpr1, 0, 0, 0, 0, implicit $exec
+ ; GCN: undef renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr2_vgpr3
+ ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, undef renamable $vgpr3, 0, 0, 0, 0, implicit $exec
+ ; GCN: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.1, align 4, addrspace 5)
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: bb.2:
; GCN: S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
index c6ba50706812..3d3b511ab34b 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
@@ -12,96 +12,101 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) {
; GCN: bb.0.entry:
; GCN: successors: %bb.1(0x80000000)
; GCN: liveins: $vgpr0, $sgpr0_sgpr1
- ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
; GCN: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4)
- ; GCN: renamable $sgpr6 = COPY renamable $sgpr1
+ ; GCN: renamable $sgpr2 = COPY renamable $sgpr1
; GCN: renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1
- ; GCN: renamable $sgpr4 = S_MOV_B32 61440
- ; GCN: renamable $sgpr5 = S_MOV_B32 -1
- ; GCN: undef renamable $sgpr0 = COPY killed renamable $sgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN: renamable $sgpr1 = COPY killed renamable $sgpr6
- ; GCN: renamable $sgpr2 = COPY killed renamable $sgpr5
- ; GCN: renamable $sgpr3 = COPY killed renamable $sgpr4
- ; GCN: SI_SPILL_S128_SAVE killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 16 into %stack.2, align 4, addrspace 5)
+ ; GCN: renamable $sgpr1 = S_MOV_B32 61440
+ ; GCN: renamable $sgpr3 = S_MOV_B32 -1
+ ; GCN: undef renamable $sgpr4 = COPY killed renamable $sgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+ ; GCN: renamable $sgpr5 = COPY killed renamable $sgpr2
+ ; GCN: renamable $sgpr6 = COPY killed renamable $sgpr3
+ ; GCN: renamable $sgpr7 = COPY killed renamable $sgpr1
; GCN: renamable $sgpr0 = S_MOV_B32 16
; GCN: renamable $sgpr1 = S_MOV_B32 15
; GCN: renamable $sgpr2 = S_MOV_B32 14
; GCN: renamable $sgpr3 = S_MOV_B32 13
- ; GCN: renamable $sgpr4 = S_MOV_B32 12
- ; GCN: renamable $sgpr5 = S_MOV_B32 11
- ; GCN: renamable $sgpr6 = S_MOV_B32 10
- ; GCN: renamable $sgpr7 = S_MOV_B32 9
- ; GCN: renamable $sgpr8 = S_MOV_B32 8
- ; GCN: renamable $sgpr9 = S_MOV_B32 7
- ; GCN: renamable $sgpr10 = S_MOV_B32 6
- ; GCN: renamable $sgpr11 = S_MOV_B32 5
- ; GCN: renamable $sgpr12 = S_MOV_B32 3
- ; GCN: renamable $sgpr13 = S_MOV_B32 2
- ; GCN: renamable $sgpr14 = S_MOV_B32 1
- ; GCN: renamable $sgpr15 = S_MOV_B32 0
- ; GCN: renamable $vgpr0 = COPY killed renamable $sgpr15
- ; GCN: renamable $vgpr30 = COPY killed renamable $sgpr14
- ; GCN: renamable $vgpr29 = COPY killed renamable $sgpr13
- ; GCN: renamable $vgpr28 = COPY killed renamable $sgpr12
- ; GCN: renamable $vgpr27 = COPY killed renamable $sgpr11
- ; GCN: renamable $vgpr26 = COPY killed renamable $sgpr10
- ; GCN: renamable $vgpr25 = COPY killed renamable $sgpr9
- ; GCN: renamable $vgpr24 = COPY killed renamable $sgpr8
- ; GCN: renamable $vgpr23 = COPY killed renamable $sgpr7
- ; GCN: renamable $vgpr22 = COPY killed renamable $sgpr6
- ; GCN: renamable $vgpr21 = COPY killed renamable $sgpr5
- ; GCN: renamable $vgpr20 = COPY killed renamable $sgpr4
- ; GCN: renamable $vgpr19 = COPY killed renamable $sgpr3
- ; GCN: renamable $vgpr18 = COPY killed renamable $sgpr2
- ; GCN: renamable $vgpr17 = COPY killed renamable $sgpr1
+ ; GCN: renamable $sgpr8 = S_MOV_B32 12
+ ; GCN: renamable $sgpr9 = S_MOV_B32 11
+ ; GCN: renamable $sgpr10 = S_MOV_B32 10
+ ; GCN: renamable $sgpr11 = S_MOV_B32 9
+ ; GCN: renamable $sgpr12 = S_MOV_B32 8
+ ; GCN: renamable $sgpr13 = S_MOV_B32 7
+ ; GCN: renamable $sgpr14 = S_MOV_B32 6
+ ; GCN: renamable $sgpr15 = S_MOV_B32 5
+ ; GCN: renamable $sgpr16 = S_MOV_B32 3
+ ; GCN: renamable $sgpr17 = S_MOV_B32 2
+ ; GCN: renamable $sgpr18 = S_MOV_B32 1
+ ; GCN: renamable $sgpr19 = S_MOV_B32 0
+ ; GCN: renamable $vgpr1 = COPY killed renamable $sgpr19
+ ; GCN: renamable $vgpr2 = COPY killed renamable $sgpr18
+ ; GCN: renamable $vgpr3 = COPY killed renamable $sgpr17
+ ; GCN: renamable $vgpr4 = COPY killed renamable $sgpr16
+ ; GCN: renamable $vgpr5 = COPY killed renamable $sgpr15
+ ; GCN: renamable $vgpr6 = COPY killed renamable $sgpr14
+ ; GCN: renamable $vgpr7 = COPY killed renamable $sgpr13
+ ; GCN: renamable $vgpr8 = COPY killed renamable $sgpr12
+ ; GCN: renamable $vgpr9 = COPY killed renamable $sgpr11
+ ; GCN: renamable $vgpr10 = COPY killed renamable $sgpr10
+ ; GCN: renamable $vgpr11 = COPY killed renamable $sgpr9
+ ; GCN: renamable $vgpr12 = COPY killed renamable $sgpr8
+ ; GCN: renamable $vgpr13 = COPY killed renamable $sgpr3
+ ; GCN: renamable $vgpr14 = COPY killed renamable $sgpr2
+ ; GCN: renamable $vgpr15 = COPY killed renamable $sgpr1
; GCN: renamable $vgpr16 = COPY killed renamable $sgpr0
- ; GCN: undef renamable $vgpr0 = COPY killed renamable $vgpr0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; GCN: renamable $vgpr1 = COPY killed renamable $vgpr30
- ; GCN: renamable $vgpr2 = COPY killed renamable $vgpr29
- ; GCN: renamable $vgpr3 = COPY killed renamable $vgpr28
- ; GCN: renamable $vgpr4 = COPY killed renamable $vgpr27
- ; GCN: renamable $vgpr5 = COPY killed renamable $vgpr26
- ; GCN: renamable $vgpr6 = COPY killed renamable $vgpr25
- ; GCN: renamable $vgpr7 = COPY killed renamable $vgpr24
- ; GCN: renamable $vgpr8 = COPY killed renamable $vgpr23
- ; GCN: renamable $vgpr9 = COPY killed renamable $vgpr22
- ; GCN: renamable $vgpr10 = COPY killed renamable $vgpr21
- ; GCN: renamable $vgpr11 = COPY killed renamable $vgpr20
- ; GCN: renamable $vgpr12 = COPY killed renamable $vgpr19
- ; GCN: renamable $vgpr13 = COPY killed renamable $vgpr18
- ; GCN: renamable $vgpr14 = COPY killed renamable $vgpr17
- ; GCN: renamable $vgpr15 = COPY killed renamable $vgpr16
- ; GCN: SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 64 into %stack.1, align 4, addrspace 5)
+ ; GCN: undef renamable $vgpr17 = COPY killed renamable $vgpr1, implicit-def $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32
+ ; GCN: renamable $vgpr18 = COPY killed renamable $vgpr2
+ ; GCN: renamable $vgpr19 = COPY killed renamable $vgpr3
+ ; GCN: renamable $vgpr20 = COPY killed renamable $vgpr4
+ ; GCN: renamable $vgpr21 = COPY killed renamable $vgpr5
+ ; GCN: renamable $vgpr22 = COPY killed renamable $vgpr6
+ ; GCN: renamable $vgpr23 = COPY killed renamable $vgpr7
+ ; GCN: renamable $vgpr24 = COPY killed renamable $vgpr8
+ ; GCN: renamable $vgpr25 = COPY killed renamable $vgpr9
+ ; GCN: renamable $vgpr26 = COPY killed renamable $vgpr10
+ ; GCN: renamable $vgpr27 = COPY killed renamable $vgpr11
+ ; GCN: renamable $vgpr28 = COPY killed renamable $vgpr12
+ ; GCN: renamable $vgpr29 = COPY killed renamable $vgpr13
+ ; GCN: renamable $vgpr30 = COPY killed renamable $vgpr14
+ ; GCN: renamable $vgpr31 = COPY killed renamable $vgpr15
+ ; GCN: renamable $vgpr32 = COPY killed renamable $vgpr16
; GCN: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec
- ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5)
- ; GCN: renamable $vgpr0 = IMPLICIT_DEF
- ; GCN: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
+ ; GCN: renamable $vgpr1 = IMPLICIT_DEF
+ ; GCN: renamable $sgpr2_sgpr3 = IMPLICIT_DEF
+ ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; GCN: SI_SPILL_S128_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 16 into %stack.1, align 4, addrspace 5)
+ ; GCN: SI_SPILL_V512_SAVE killed $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32, %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 64 into %stack.2, align 4, addrspace 5)
+ ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.3, align 4, addrspace 5)
+ ; GCN: SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
+ ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5)
; GCN: bb.1:
; GCN: successors: %bb.1(0x40000000), %bb.3(0x40000000)
- ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.4, align 4, addrspace 5)
- ; GCN: $vgpr17 = SI_SPILL_V32_RESTORE %stack.5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.5, addrspace 5)
- ; GCN: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 64 from %stack.1, align 4, addrspace 5)
- ; GCN: $vgpr16 = SI_SPILL_V32_RESTORE %stack.3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5)
- ; GCN: renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr16, implicit $exec
- ; GCN: renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, $vgpr16, implicit $exec
- ; GCN: renamable $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.5, align 4, addrspace 5)
+ ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5)
+ ; GCN: $vgpr1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; GCN: renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
+ ; GCN: renamable $sgpr4_sgpr5 = V_CMP_EQ_U32_e64 $sgpr2, killed $vgpr1, implicit $exec
+ ; GCN: renamable $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def $scc, implicit $exec
; GCN: S_SET_GPR_IDX_ON killed renamable $sgpr2, 1, implicit-def $m0, implicit-def undef $mode, implicit $m0, implicit $mode
- ; GCN: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
- ; GCN: SI_SPILL_V32_SAVE $vgpr0, %stack.6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.6, addrspace 5)
+ ; GCN: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = SI_SPILL_V512_RESTORE %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 64 from %stack.2, align 4, addrspace 5)
+ ; GCN: renamable $vgpr18 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, implicit $m0
; GCN: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
- ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.5, addrspace 5)
- ; GCN: renamable $sgpr2_sgpr3 = COPY renamable $sgpr0_sgpr1
- ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.4, align 4, addrspace 5)
- ; GCN: $exec = S_XOR_B64_term $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc
+ ; GCN: renamable $vgpr19 = COPY renamable $vgpr18
+ ; GCN: renamable $sgpr2_sgpr3 = COPY renamable $sgpr4_sgpr5
+ ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5)
+ ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.6, align 4, addrspace 5)
+ ; GCN: SI_SPILL_V32_SAVE killed $vgpr19, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
+ ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5)
+ ; GCN: SI_SPILL_V32_SAVE killed $vgpr18, %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN: $exec = S_XOR_B64_term $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
; GCN: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; GCN: bb.3:
; GCN: successors: %bb.2(0x80000000)
- ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.0, align 4, addrspace 5)
- ; GCN: $exec = S_MOV_B64 renamable $sgpr0_sgpr1
+ ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.3, align 4, addrspace 5)
+ ; GCN: $exec = S_MOV_B64 killed renamable $sgpr0_sgpr1
; GCN: bb.2:
- ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.6, addrspace 5)
- ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.2, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 16 from %stack.2, align 4, addrspace 5)
- ; GCN: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1)
+ ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
+ ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 16 from %stack.1, align 4, addrspace 5)
+ ; GCN: BUFFER_STORE_DWORD_OFFSET renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1)
; GCN: S_ENDPGM 0
entry:
%id = call i32 @llvm.amdgcn.workitem.id.x() #1
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
index 4dfc9bce69aa..230cd8eb5b0d 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
@@ -236,18 +236,17 @@ entry:
; W64-O0-DAG: s_mov_b32 [[IDX_S:s[0-9]+]], s{{[0-9]+}}
; W64-O0-DAG: v_mov_b32_e32 [[IDX_V:v[0-9]+]], s{{[0-9]+}}
; W64-O0-DAG: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec
-; W64-O0-DAG: buffer_store_dword [[IDX_V]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Spill
+; W64-O0-DAG: buffer_store_dword [[IDX_V]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill
-; W64-O0: [[LOOPBB0:BB[0-9]+_[0-9]+]]: ; =>This Inner Loop Header: Depth=1
-; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 ; 4-byte Folded Reload
-; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; W64-O0: [[LOOPBB0:BB[0-9]+_[0-9]+]]:
+; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
; W64-O0: s_waitcnt vmcnt(0)
-; W64-O0-DAG: v_readfirstlane_b32 s[[S0:[0-9]+]], v[[VRSRC0]]
+; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP0:[0-9]+]], v[[VRSRC0]]
; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP1:[0-9]+]], v[[VRSRC1]]
-; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[S0]]
+; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[SRSRCTMP0]]
; W64-O0-DAG: s_mov_b32 s[[SRSRC1:[0-9]+]], s[[SRSRCTMP1]]
; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}}
; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP2:[0-9]+]], v[[VRSRC2]]
@@ -256,37 +255,37 @@ entry:
; W64-O0-DAG: s_mov_b32 s[[SRSRC3:[0-9]+]], s[[SRSRCTMP3]]
; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
; W64-O0-DAG: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]]
+; W64-O0-DAG: s_mov_b32 s[[S0:[0-9]+]], s[[SRSRCTMP0]]
; W64-O0-DAG: s_mov_b32 s[[S1:[0-9]+]], s[[SRSRCTMP1]]
; W64-O0-DAG: s_mov_b32 s[[S2:[0-9]+]], s[[SRSRCTMP2]]
; W64-O0-DAG: s_mov_b32 s[[S3:[0-9]+]], s[[SRSRCTMP3]]
; W64-O0: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
+; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[IDX_OFF]] ; 4-byte Folded Reload
; W64-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[S0]]:[[S3]]{{\]}}, {{.*}} idxen
; W64-O0: s_waitcnt vmcnt(0)
; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill
; W64-O0: s_xor_b64 exec, exec, [[SAVE]]
; W64-O0-NEXT: s_cbranch_execnz [[LOOPBB0]]
-
-; XXX-W64-O0: s_mov_b64 exec, [[SAVEEXEC]]
+; CHECK-O0: s_mov_b64 exec, [[SAVEEXEC]]
; W64-O0: buffer_load_dword [[RES:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload
; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF:[0-9]+]] ; 4-byte Folded Spill
; W64-O0: s_cbranch_execz [[TERMBB:BB[0-9]+_[0-9]+]]
-; W64-O0: ; %bb.{{[0-9]+}}: ; %bb1
-; W64-O0-DAG: buffer_store_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill
+; W64-O0: ; %bb.{{[0-9]+}}:
; W64-O0-DAG: s_mov_b64 s{{\[}}[[SAVEEXEC0:[0-9]+]]:[[SAVEEXEC1:[0-9]+]]{{\]}}, exec
+; W64-O0-DAG: buffer_store_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill
; W64-O0: v_writelane_b32 [[VSAVEEXEC:v[0-9]+]], s[[SAVEEXEC0]], [[SAVEEXEC_IDX0:[0-9]+]]
-; W64-O0: v_writelane_b32 [[VSAVEEXEC]], s[[SAVEEXEC1]], [[SAVEEXEC_IDX1:[0-9]+]]
+; W64-O0: v_writelane_b32 [[VSAVEEXEC:v[0-9]+]], s[[SAVEEXEC1]], [[SAVEEXEC_IDX1:[0-9]+]]
-; W64-O0: [[LOOPBB1:BB[0-9]+_[0-9]+]]: ; =>This Inner Loop Header: Depth=1
-; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:[[IDX_OFF]] ; 4-byte Folded Reload
-; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; W64-O0: [[LOOPBB1:BB[0-9]+_[0-9]+]]:
+; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
; W64-O0: s_waitcnt vmcnt(0)
-; W64-O0-DAG: v_readfirstlane_b32 s[[S0:[0-9]+]], v[[VRSRC0]]
+; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP0:[0-9]+]], v[[VRSRC0]]
; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP1:[0-9]+]], v[[VRSRC1]]
-; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[S0]]
+; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[SRSRCTMP0]]
; W64-O0-DAG: s_mov_b32 s[[SRSRC1:[0-9]+]], s[[SRSRCTMP1]]
; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}}
; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP2:[0-9]+]], v[[VRSRC2]]
@@ -295,10 +294,12 @@ entry:
; W64-O0-DAG: s_mov_b32 s[[SRSRC3:[0-9]+]], s[[SRSRCTMP3]]
; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
; W64-O0-DAG: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]]
+; W64-O0-DAG: s_mov_b32 s[[S0:[0-9]+]], s[[SRSRCTMP0]]
; W64-O0-DAG: s_mov_b32 s[[S1:[0-9]+]], s[[SRSRCTMP1]]
; W64-O0-DAG: s_mov_b32 s[[S2:[0-9]+]], s[[SRSRCTMP2]]
; W64-O0-DAG: s_mov_b32 s[[S3:[0-9]+]], s[[SRSRCTMP3]]
; W64-O0: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
+; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[IDX_OFF]] ; 4-byte Folded Reload
; W64-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[S0]]:[[S3]]{{\]}}, {{.*}} idxen
; W64-O0: s_waitcnt vmcnt(0)
; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill
diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
index dccee0a298a3..b119ffd303e0 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
@@ -15,379 +15,381 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out,
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:11]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s4, 0
-; GCN-NEXT: v_writelane_b32 v0, s5, 1
-; GCN-NEXT: v_writelane_b32 v0, s6, 2
-; GCN-NEXT: v_writelane_b32 v0, s7, 3
-; GCN-NEXT: v_writelane_b32 v0, s8, 4
-; GCN-NEXT: v_writelane_b32 v0, s9, 5
-; GCN-NEXT: v_writelane_b32 v0, s10, 6
-; GCN-NEXT: v_writelane_b32 v0, s11, 7
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:11]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s4, 8
-; GCN-NEXT: v_writelane_b32 v0, s5, 9
-; GCN-NEXT: v_writelane_b32 v0, s6, 10
-; GCN-NEXT: v_writelane_b32 v0, s7, 11
-; GCN-NEXT: v_writelane_b32 v0, s8, 12
-; GCN-NEXT: v_writelane_b32 v0, s9, 13
-; GCN-NEXT: v_writelane_b32 v0, s10, 14
-; GCN-NEXT: v_writelane_b32 v0, s11, 15
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:11]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s4, 16
-; GCN-NEXT: v_writelane_b32 v0, s5, 17
-; GCN-NEXT: v_writelane_b32 v0, s6, 18
-; GCN-NEXT: v_writelane_b32 v0, s7, 19
-; GCN-NEXT: v_writelane_b32 v0, s8, 20
-; GCN-NEXT: v_writelane_b32 v0, s9, 21
-; GCN-NEXT: v_writelane_b32 v0, s10, 22
-; GCN-NEXT: v_writelane_b32 v0, s11, 23
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ; def s[12:19]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s4, 24
-; GCN-NEXT: v_writelane_b32 v0, s5, 25
-; GCN-NEXT: v_writelane_b32 v0, s6, 26
-; GCN-NEXT: v_writelane_b32 v0, s7, 27
-; GCN-NEXT: v_writelane_b32 v0, s8, 28
-; GCN-NEXT: v_writelane_b32 v0, s9, 29
-; GCN-NEXT: v_writelane_b32 v0, s10, 30
-; GCN-NEXT: v_writelane_b32 v0, s11, 31
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ; def s[20:27]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s4, 32
-; GCN-NEXT: v_writelane_b32 v0, s5, 33
-; GCN-NEXT: v_writelane_b32 v0, s6, 34
-; GCN-NEXT: v_writelane_b32 v0, s7, 35
-; GCN-NEXT: v_writelane_b32 v0, s8, 36
-; GCN-NEXT: v_writelane_b32 v0, s9, 37
-; GCN-NEXT: v_writelane_b32 v0, s10, 38
-; GCN-NEXT: v_writelane_b32 v0, s11, 39
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ; def s[36:43]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s4, 40
-; GCN-NEXT: v_writelane_b32 v0, s5, 41
-; GCN-NEXT: v_writelane_b32 v0, s6, 42
-; GCN-NEXT: v_writelane_b32 v0, s7, 43
-; GCN-NEXT: v_writelane_b32 v0, s8, 44
-; GCN-NEXT: v_writelane_b32 v0, s9, 45
-; GCN-NEXT: v_writelane_b32 v0, s10, 46
-; GCN-NEXT: v_writelane_b32 v0, s11, 47
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ; def s[44:51]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s4, 48
-; GCN-NEXT: v_writelane_b32 v0, s5, 49
-; GCN-NEXT: v_writelane_b32 v0, s6, 50
-; GCN-NEXT: v_writelane_b32 v0, s7, 51
-; GCN-NEXT: v_writelane_b32 v0, s8, 52
-; GCN-NEXT: v_writelane_b32 v0, s9, 53
-; GCN-NEXT: v_writelane_b32 v0, s10, 54
-; GCN-NEXT: v_writelane_b32 v0, s11, 55
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:11]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s4, 56
-; GCN-NEXT: v_writelane_b32 v0, s5, 57
-; GCN-NEXT: v_writelane_b32 v0, s6, 58
-; GCN-NEXT: v_writelane_b32 v0, s7, 59
-; GCN-NEXT: v_writelane_b32 v0, s8, 60
-; GCN-NEXT: v_writelane_b32 v0, s9, 61
-; GCN-NEXT: v_writelane_b32 v0, s10, 62
-; GCN-NEXT: v_writelane_b32 v0, s11, 63
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ; def s[52:59]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 0
-; GCN-NEXT: v_writelane_b32 v1, s5, 1
-; GCN-NEXT: v_writelane_b32 v1, s6, 2
-; GCN-NEXT: v_writelane_b32 v1, s7, 3
-; GCN-NEXT: v_writelane_b32 v1, s8, 4
-; GCN-NEXT: v_writelane_b32 v1, s9, 5
-; GCN-NEXT: v_writelane_b32 v1, s10, 6
-; GCN-NEXT: v_writelane_b32 v1, s11, 7
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:11]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 8
-; GCN-NEXT: v_writelane_b32 v1, s5, 9
-; GCN-NEXT: v_writelane_b32 v1, s6, 10
-; GCN-NEXT: v_writelane_b32 v1, s7, 11
-; GCN-NEXT: v_writelane_b32 v1, s8, 12
-; GCN-NEXT: v_writelane_b32 v1, s9, 13
-; GCN-NEXT: v_writelane_b32 v1, s10, 14
-; GCN-NEXT: v_writelane_b32 v1, s11, 15
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:11]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 16
-; GCN-NEXT: v_writelane_b32 v1, s5, 17
-; GCN-NEXT: v_writelane_b32 v1, s6, 18
-; GCN-NEXT: v_writelane_b32 v1, s7, 19
-; GCN-NEXT: v_writelane_b32 v1, s8, 20
-; GCN-NEXT: v_writelane_b32 v1, s9, 21
-; GCN-NEXT: v_writelane_b32 v1, s10, 22
-; GCN-NEXT: v_writelane_b32 v1, s11, 23
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:11]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 24
-; GCN-NEXT: v_writelane_b32 v1, s5, 25
-; GCN-NEXT: v_writelane_b32 v1, s6, 26
-; GCN-NEXT: v_writelane_b32 v1, s7, 27
-; GCN-NEXT: v_writelane_b32 v1, s8, 28
-; GCN-NEXT: v_writelane_b32 v1, s9, 29
-; GCN-NEXT: v_writelane_b32 v1, s10, 30
-; GCN-NEXT: v_writelane_b32 v1, s11, 31
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:11]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 32
-; GCN-NEXT: v_writelane_b32 v1, s5, 33
-; GCN-NEXT: v_writelane_b32 v1, s6, 34
-; GCN-NEXT: v_writelane_b32 v1, s7, 35
-; GCN-NEXT: v_writelane_b32 v1, s8, 36
-; GCN-NEXT: v_writelane_b32 v1, s9, 37
-; GCN-NEXT: v_writelane_b32 v1, s10, 38
-; GCN-NEXT: v_writelane_b32 v1, s11, 39
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ; def s[60:67]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 40
-; GCN-NEXT: v_writelane_b32 v1, s5, 41
-; GCN-NEXT: v_writelane_b32 v1, s6, 42
-; GCN-NEXT: v_writelane_b32 v1, s7, 43
-; GCN-NEXT: v_writelane_b32 v1, s8, 44
-; GCN-NEXT: v_writelane_b32 v1, s9, 45
-; GCN-NEXT: v_writelane_b32 v1, s10, 46
-; GCN-NEXT: v_writelane_b32 v1, s11, 47
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ; def s[68:75]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 48
-; GCN-NEXT: v_writelane_b32 v1, s5, 49
-; GCN-NEXT: v_writelane_b32 v1, s6, 50
-; GCN-NEXT: v_writelane_b32 v1, s7, 51
-; GCN-NEXT: v_writelane_b32 v1, s8, 52
-; GCN-NEXT: v_writelane_b32 v1, s9, 53
-; GCN-NEXT: v_writelane_b32 v1, s10, 54
-; GCN-NEXT: v_writelane_b32 v1, s11, 55
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ; def s[76:83]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 56
-; GCN-NEXT: v_writelane_b32 v1, s5, 57
-; GCN-NEXT: v_writelane_b32 v1, s6, 58
-; GCN-NEXT: v_writelane_b32 v1, s7, 59
-; GCN-NEXT: v_writelane_b32 v1, s8, 60
-; GCN-NEXT: v_writelane_b32 v1, s9, 61
-; GCN-NEXT: v_writelane_b32 v1, s10, 62
-; GCN-NEXT: v_writelane_b32 v1, s11, 63
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ; def s[84:91]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v2, s4, 0
-; GCN-NEXT: v_writelane_b32 v2, s5, 1
-; GCN-NEXT: v_writelane_b32 v2, s6, 2
-; GCN-NEXT: v_writelane_b32 v2, s7, 3
-; GCN-NEXT: v_writelane_b32 v2, s8, 4
-; GCN-NEXT: v_writelane_b32 v2, s9, 5
-; GCN-NEXT: v_writelane_b32 v2, s10, 6
-; GCN-NEXT: v_writelane_b32 v2, s11, 7
-; GCN-NEXT: s_mov_b32 s1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_cmp_lg_u32 s0, s1
+; GCN-NEXT: v_writelane_b32 v0, s0, 0
+; GCN-NEXT: v_writelane_b32 v0, s4, 1
+; GCN-NEXT: v_writelane_b32 v0, s5, 2
+; GCN-NEXT: v_writelane_b32 v0, s6, 3
+; GCN-NEXT: v_writelane_b32 v0, s7, 4
+; GCN-NEXT: v_writelane_b32 v0, s8, 5
+; GCN-NEXT: v_writelane_b32 v0, s9, 6
+; GCN-NEXT: v_writelane_b32 v0, s10, 7
+; GCN-NEXT: v_writelane_b32 v0, s11, 8
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[0:7]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s0, 9
+; GCN-NEXT: v_writelane_b32 v0, s1, 10
+; GCN-NEXT: v_writelane_b32 v0, s2, 11
+; GCN-NEXT: v_writelane_b32 v0, s3, 12
+; GCN-NEXT: v_writelane_b32 v0, s4, 13
+; GCN-NEXT: v_writelane_b32 v0, s5, 14
+; GCN-NEXT: v_writelane_b32 v0, s6, 15
+; GCN-NEXT: v_writelane_b32 v0, s7, 16
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[0:7]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s0, 17
+; GCN-NEXT: v_writelane_b32 v0, s1, 18
+; GCN-NEXT: v_writelane_b32 v0, s2, 19
+; GCN-NEXT: v_writelane_b32 v0, s3, 20
+; GCN-NEXT: v_writelane_b32 v0, s4, 21
+; GCN-NEXT: v_writelane_b32 v0, s5, 22
+; GCN-NEXT: v_writelane_b32 v0, s6, 23
+; GCN-NEXT: v_writelane_b32 v0, s7, 24
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[0:7]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s0, 25
+; GCN-NEXT: v_writelane_b32 v0, s1, 26
+; GCN-NEXT: v_writelane_b32 v0, s2, 27
+; GCN-NEXT: v_writelane_b32 v0, s3, 28
+; GCN-NEXT: v_writelane_b32 v0, s4, 29
+; GCN-NEXT: v_writelane_b32 v0, s5, 30
+; GCN-NEXT: v_writelane_b32 v0, s6, 31
+; GCN-NEXT: v_writelane_b32 v0, s7, 32
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[0:7]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s0, 33
+; GCN-NEXT: v_writelane_b32 v0, s1, 34
+; GCN-NEXT: v_writelane_b32 v0, s2, 35
+; GCN-NEXT: v_writelane_b32 v0, s3, 36
+; GCN-NEXT: v_writelane_b32 v0, s4, 37
+; GCN-NEXT: v_writelane_b32 v0, s5, 38
+; GCN-NEXT: v_writelane_b32 v0, s6, 39
+; GCN-NEXT: v_writelane_b32 v0, s7, 40
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[0:7]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s0, 41
+; GCN-NEXT: v_writelane_b32 v0, s1, 42
+; GCN-NEXT: v_writelane_b32 v0, s2, 43
+; GCN-NEXT: v_writelane_b32 v0, s3, 44
+; GCN-NEXT: v_writelane_b32 v0, s4, 45
+; GCN-NEXT: v_writelane_b32 v0, s5, 46
+; GCN-NEXT: v_writelane_b32 v0, s6, 47
+; GCN-NEXT: v_writelane_b32 v0, s7, 48
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[0:7]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s0, 49
+; GCN-NEXT: v_writelane_b32 v0, s1, 50
+; GCN-NEXT: v_writelane_b32 v0, s2, 51
+; GCN-NEXT: v_writelane_b32 v0, s3, 52
+; GCN-NEXT: v_writelane_b32 v0, s4, 53
+; GCN-NEXT: v_writelane_b32 v0, s5, 54
+; GCN-NEXT: v_writelane_b32 v0, s6, 55
+; GCN-NEXT: v_writelane_b32 v0, s7, 56
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[0:7]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_mov_b32 s8, 0
+; GCN-NEXT: v_readlane_b32 s9, v0, 0
+; GCN-NEXT: s_cmp_lg_u32 s9, s8
+; GCN-NEXT: v_writelane_b32 v0, s12, 57
+; GCN-NEXT: v_writelane_b32 v0, s13, 58
+; GCN-NEXT: v_writelane_b32 v0, s14, 59
+; GCN-NEXT: v_writelane_b32 v0, s15, 60
+; GCN-NEXT: v_writelane_b32 v0, s16, 61
+; GCN-NEXT: v_writelane_b32 v0, s17, 62
+; GCN-NEXT: v_writelane_b32 v0, s18, 63
+; GCN-NEXT: v_writelane_b32 v1, s19, 0
+; GCN-NEXT: v_writelane_b32 v1, s20, 1
+; GCN-NEXT: v_writelane_b32 v1, s21, 2
+; GCN-NEXT: v_writelane_b32 v1, s22, 3
+; GCN-NEXT: v_writelane_b32 v1, s23, 4
+; GCN-NEXT: v_writelane_b32 v1, s24, 5
+; GCN-NEXT: v_writelane_b32 v1, s25, 6
+; GCN-NEXT: v_writelane_b32 v1, s26, 7
+; GCN-NEXT: v_writelane_b32 v1, s27, 8
+; GCN-NEXT: v_writelane_b32 v1, s36, 9
+; GCN-NEXT: v_writelane_b32 v1, s37, 10
+; GCN-NEXT: v_writelane_b32 v1, s38, 11
+; GCN-NEXT: v_writelane_b32 v1, s39, 12
+; GCN-NEXT: v_writelane_b32 v1, s40, 13
+; GCN-NEXT: v_writelane_b32 v1, s41, 14
+; GCN-NEXT: v_writelane_b32 v1, s42, 15
+; GCN-NEXT: v_writelane_b32 v1, s43, 16
+; GCN-NEXT: v_writelane_b32 v1, s44, 17
+; GCN-NEXT: v_writelane_b32 v1, s45, 18
+; GCN-NEXT: v_writelane_b32 v1, s46, 19
+; GCN-NEXT: v_writelane_b32 v1, s47, 20
+; GCN-NEXT: v_writelane_b32 v1, s48, 21
+; GCN-NEXT: v_writelane_b32 v1, s49, 22
+; GCN-NEXT: v_writelane_b32 v1, s50, 23
+; GCN-NEXT: v_writelane_b32 v1, s51, 24
+; GCN-NEXT: v_writelane_b32 v1, s52, 25
+; GCN-NEXT: v_writelane_b32 v1, s53, 26
+; GCN-NEXT: v_writelane_b32 v1, s54, 27
+; GCN-NEXT: v_writelane_b32 v1, s55, 28
+; GCN-NEXT: v_writelane_b32 v1, s56, 29
+; GCN-NEXT: v_writelane_b32 v1, s57, 30
+; GCN-NEXT: v_writelane_b32 v1, s58, 31
+; GCN-NEXT: v_writelane_b32 v1, s59, 32
+; GCN-NEXT: v_writelane_b32 v1, s60, 33
+; GCN-NEXT: v_writelane_b32 v1, s61, 34
+; GCN-NEXT: v_writelane_b32 v1, s62, 35
+; GCN-NEXT: v_writelane_b32 v1, s63, 36
+; GCN-NEXT: v_writelane_b32 v1, s64, 37
+; GCN-NEXT: v_writelane_b32 v1, s65, 38
+; GCN-NEXT: v_writelane_b32 v1, s66, 39
+; GCN-NEXT: v_writelane_b32 v1, s67, 40
+; GCN-NEXT: v_writelane_b32 v1, s68, 41
+; GCN-NEXT: v_writelane_b32 v1, s69, 42
+; GCN-NEXT: v_writelane_b32 v1, s70, 43
+; GCN-NEXT: v_writelane_b32 v1, s71, 44
+; GCN-NEXT: v_writelane_b32 v1, s72, 45
+; GCN-NEXT: v_writelane_b32 v1, s73, 46
+; GCN-NEXT: v_writelane_b32 v1, s74, 47
+; GCN-NEXT: v_writelane_b32 v1, s75, 48
+; GCN-NEXT: v_writelane_b32 v1, s76, 49
+; GCN-NEXT: v_writelane_b32 v1, s77, 50
+; GCN-NEXT: v_writelane_b32 v1, s78, 51
+; GCN-NEXT: v_writelane_b32 v1, s79, 52
+; GCN-NEXT: v_writelane_b32 v1, s80, 53
+; GCN-NEXT: v_writelane_b32 v1, s81, 54
+; GCN-NEXT: v_writelane_b32 v1, s82, 55
+; GCN-NEXT: v_writelane_b32 v1, s83, 56
+; GCN-NEXT: v_writelane_b32 v1, s84, 57
+; GCN-NEXT: v_writelane_b32 v1, s85, 58
+; GCN-NEXT: v_writelane_b32 v1, s86, 59
+; GCN-NEXT: v_writelane_b32 v1, s87, 60
+; GCN-NEXT: v_writelane_b32 v1, s88, 61
+; GCN-NEXT: v_writelane_b32 v1, s89, 62
+; GCN-NEXT: v_writelane_b32 v1, s90, 63
+; GCN-NEXT: v_writelane_b32 v2, s91, 0
+; GCN-NEXT: v_writelane_b32 v2, s0, 1
+; GCN-NEXT: v_writelane_b32 v2, s1, 2
+; GCN-NEXT: v_writelane_b32 v2, s2, 3
+; GCN-NEXT: v_writelane_b32 v2, s3, 4
+; GCN-NEXT: v_writelane_b32 v2, s4, 5
+; GCN-NEXT: v_writelane_b32 v2, s5, 6
+; GCN-NEXT: v_writelane_b32 v2, s6, 7
+; GCN-NEXT: v_writelane_b32 v2, s7, 8
; GCN-NEXT: s_cbranch_scc1 BB0_2
; GCN-NEXT: ; %bb.1: ; %bb0
-; GCN-NEXT: v_readlane_b32 s8, v1, 56
-; GCN-NEXT: v_readlane_b32 s9, v1, 57
-; GCN-NEXT: v_readlane_b32 s10, v1, 58
-; GCN-NEXT: v_readlane_b32 s11, v1, 59
-; GCN-NEXT: v_readlane_b32 s12, v1, 60
-; GCN-NEXT: v_readlane_b32 s13, v1, 61
-; GCN-NEXT: v_readlane_b32 s14, v1, 62
-; GCN-NEXT: v_readlane_b32 s15, v1, 63
-; GCN-NEXT: v_readlane_b32 s16, v1, 48
-; GCN-NEXT: v_readlane_b32 s17, v1, 49
-; GCN-NEXT: v_readlane_b32 s18, v1, 50
-; GCN-NEXT: v_readlane_b32 s19, v1, 51
-; GCN-NEXT: v_readlane_b32 s20, v1, 52
-; GCN-NEXT: v_readlane_b32 s21, v1, 53
-; GCN-NEXT: v_readlane_b32 s22, v1, 54
-; GCN-NEXT: v_readlane_b32 s23, v1, 55
-; GCN-NEXT: v_readlane_b32 s24, v1, 40
-; GCN-NEXT: v_readlane_b32 s25, v1, 41
-; GCN-NEXT: v_readlane_b32 s26, v1, 42
-; GCN-NEXT: v_readlane_b32 s27, v1, 43
-; GCN-NEXT: v_readlane_b32 s28, v1, 44
-; GCN-NEXT: v_readlane_b32 s29, v1, 45
-; GCN-NEXT: v_readlane_b32 s30, v1, 46
-; GCN-NEXT: v_readlane_b32 s31, v1, 47
-; GCN-NEXT: v_readlane_b32 s36, v1, 32
-; GCN-NEXT: v_readlane_b32 s37, v1, 33
-; GCN-NEXT: v_readlane_b32 s38, v1, 34
-; GCN-NEXT: v_readlane_b32 s39, v1, 35
-; GCN-NEXT: v_readlane_b32 s40, v1, 36
-; GCN-NEXT: v_readlane_b32 s41, v1, 37
-; GCN-NEXT: v_readlane_b32 s42, v1, 38
-; GCN-NEXT: v_readlane_b32 s43, v1, 39
-; GCN-NEXT: v_readlane_b32 s44, v1, 24
-; GCN-NEXT: v_readlane_b32 s45, v1, 25
-; GCN-NEXT: v_readlane_b32 s46, v1, 26
-; GCN-NEXT: v_readlane_b32 s47, v1, 27
-; GCN-NEXT: v_readlane_b32 s48, v1, 28
-; GCN-NEXT: v_readlane_b32 s49, v1, 29
-; GCN-NEXT: v_readlane_b32 s50, v1, 30
-; GCN-NEXT: v_readlane_b32 s51, v1, 31
-; GCN-NEXT: v_readlane_b32 s52, v1, 16
-; GCN-NEXT: v_readlane_b32 s53, v1, 17
-; GCN-NEXT: v_readlane_b32 s54, v1, 18
-; GCN-NEXT: v_readlane_b32 s55, v1, 19
-; GCN-NEXT: v_readlane_b32 s56, v1, 20
-; GCN-NEXT: v_readlane_b32 s57, v1, 21
-; GCN-NEXT: v_readlane_b32 s58, v1, 22
-; GCN-NEXT: v_readlane_b32 s59, v1, 23
-; GCN-NEXT: v_readlane_b32 s60, v1, 8
-; GCN-NEXT: v_readlane_b32 s61, v1, 9
-; GCN-NEXT: v_readlane_b32 s62, v1, 10
-; GCN-NEXT: v_readlane_b32 s63, v1, 11
-; GCN-NEXT: v_readlane_b32 s64, v1, 12
-; GCN-NEXT: v_readlane_b32 s65, v1, 13
-; GCN-NEXT: v_readlane_b32 s66, v1, 14
-; GCN-NEXT: v_readlane_b32 s67, v1, 15
-; GCN-NEXT: v_readlane_b32 s68, v1, 0
-; GCN-NEXT: v_readlane_b32 s69, v1, 1
-; GCN-NEXT: v_readlane_b32 s70, v1, 2
-; GCN-NEXT: v_readlane_b32 s71, v1, 3
-; GCN-NEXT: v_readlane_b32 s72, v1, 4
-; GCN-NEXT: v_readlane_b32 s73, v1, 5
-; GCN-NEXT: v_readlane_b32 s74, v1, 6
-; GCN-NEXT: v_readlane_b32 s75, v1, 7
-; GCN-NEXT: v_readlane_b32 s76, v0, 56
-; GCN-NEXT: v_readlane_b32 s77, v0, 57
-; GCN-NEXT: v_readlane_b32 s78, v0, 58
-; GCN-NEXT: v_readlane_b32 s79, v0, 59
-; GCN-NEXT: v_readlane_b32 s80, v0, 60
-; GCN-NEXT: v_readlane_b32 s81, v0, 61
-; GCN-NEXT: v_readlane_b32 s82, v0, 62
-; GCN-NEXT: v_readlane_b32 s83, v0, 63
-; GCN-NEXT: v_readlane_b32 s84, v0, 48
-; GCN-NEXT: v_readlane_b32 s85, v0, 49
-; GCN-NEXT: v_readlane_b32 s86, v0, 50
-; GCN-NEXT: v_readlane_b32 s87, v0, 51
-; GCN-NEXT: v_readlane_b32 s88, v0, 52
-; GCN-NEXT: v_readlane_b32 s89, v0, 53
-; GCN-NEXT: v_readlane_b32 s90, v0, 54
-; GCN-NEXT: v_readlane_b32 s91, v0, 55
-; GCN-NEXT: v_readlane_b32 s0, v0, 0
-; GCN-NEXT: v_readlane_b32 s1, v0, 1
-; GCN-NEXT: v_readlane_b32 s2, v0, 2
-; GCN-NEXT: v_readlane_b32 s3, v0, 3
-; GCN-NEXT: v_readlane_b32 s4, v0, 4
-; GCN-NEXT: v_readlane_b32 s5, v0, 5
-; GCN-NEXT: v_readlane_b32 s6, v0, 6
-; GCN-NEXT: v_readlane_b32 s7, v0, 7
+; GCN-NEXT: v_readlane_b32 s0, v0, 1
+; GCN-NEXT: v_readlane_b32 s1, v0, 2
+; GCN-NEXT: v_readlane_b32 s2, v0, 3
+; GCN-NEXT: v_readlane_b32 s3, v0, 4
+; GCN-NEXT: v_readlane_b32 s4, v0, 5
+; GCN-NEXT: v_readlane_b32 s5, v0, 6
+; GCN-NEXT: v_readlane_b32 s6, v0, 7
+; GCN-NEXT: v_readlane_b32 s7, v0, 8
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 8
-; GCN-NEXT: v_readlane_b32 s1, v0, 9
-; GCN-NEXT: v_readlane_b32 s2, v0, 10
-; GCN-NEXT: v_readlane_b32 s3, v0, 11
-; GCN-NEXT: v_readlane_b32 s4, v0, 12
-; GCN-NEXT: v_readlane_b32 s5, v0, 13
-; GCN-NEXT: v_readlane_b32 s6, v0, 14
-; GCN-NEXT: v_readlane_b32 s7, v0, 15
+; GCN-NEXT: v_readlane_b32 s0, v0, 57
+; GCN-NEXT: v_readlane_b32 s1, v0, 58
+; GCN-NEXT: v_readlane_b32 s2, v0, 59
+; GCN-NEXT: v_readlane_b32 s3, v0, 60
+; GCN-NEXT: v_readlane_b32 s4, v0, 61
+; GCN-NEXT: v_readlane_b32 s5, v0, 62
+; GCN-NEXT: v_readlane_b32 s6, v0, 63
+; GCN-NEXT: v_readlane_b32 s7, v1, 0
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 16
-; GCN-NEXT: v_readlane_b32 s1, v0, 17
-; GCN-NEXT: v_readlane_b32 s2, v0, 18
-; GCN-NEXT: v_readlane_b32 s3, v0, 19
-; GCN-NEXT: v_readlane_b32 s4, v0, 20
-; GCN-NEXT: v_readlane_b32 s5, v0, 21
-; GCN-NEXT: v_readlane_b32 s6, v0, 22
-; GCN-NEXT: v_readlane_b32 s7, v0, 23
+; GCN-NEXT: v_readlane_b32 s0, v1, 1
+; GCN-NEXT: v_readlane_b32 s1, v1, 2
+; GCN-NEXT: v_readlane_b32 s2, v1, 3
+; GCN-NEXT: v_readlane_b32 s3, v1, 4
+; GCN-NEXT: v_readlane_b32 s4, v1, 5
+; GCN-NEXT: v_readlane_b32 s5, v1, 6
+; GCN-NEXT: v_readlane_b32 s6, v1, 7
+; GCN-NEXT: v_readlane_b32 s7, v1, 8
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 24
-; GCN-NEXT: v_readlane_b32 s1, v0, 25
-; GCN-NEXT: v_readlane_b32 s2, v0, 26
-; GCN-NEXT: v_readlane_b32 s3, v0, 27
-; GCN-NEXT: v_readlane_b32 s4, v0, 28
-; GCN-NEXT: v_readlane_b32 s5, v0, 29
-; GCN-NEXT: v_readlane_b32 s6, v0, 30
-; GCN-NEXT: v_readlane_b32 s7, v0, 31
+; GCN-NEXT: v_readlane_b32 s0, v1, 9
+; GCN-NEXT: v_readlane_b32 s1, v1, 10
+; GCN-NEXT: v_readlane_b32 s2, v1, 11
+; GCN-NEXT: v_readlane_b32 s3, v1, 12
+; GCN-NEXT: v_readlane_b32 s4, v1, 13
+; GCN-NEXT: v_readlane_b32 s5, v1, 14
+; GCN-NEXT: v_readlane_b32 s6, v1, 15
+; GCN-NEXT: v_readlane_b32 s7, v1, 16
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 32
-; GCN-NEXT: v_readlane_b32 s1, v0, 33
-; GCN-NEXT: v_readlane_b32 s2, v0, 34
-; GCN-NEXT: v_readlane_b32 s3, v0, 35
-; GCN-NEXT: v_readlane_b32 s4, v0, 36
-; GCN-NEXT: v_readlane_b32 s5, v0, 37
-; GCN-NEXT: v_readlane_b32 s6, v0, 38
-; GCN-NEXT: v_readlane_b32 s7, v0, 39
+; GCN-NEXT: v_readlane_b32 s0, v1, 17
+; GCN-NEXT: v_readlane_b32 s1, v1, 18
+; GCN-NEXT: v_readlane_b32 s2, v1, 19
+; GCN-NEXT: v_readlane_b32 s3, v1, 20
+; GCN-NEXT: v_readlane_b32 s4, v1, 21
+; GCN-NEXT: v_readlane_b32 s5, v1, 22
+; GCN-NEXT: v_readlane_b32 s6, v1, 23
+; GCN-NEXT: v_readlane_b32 s7, v1, 24
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 40
-; GCN-NEXT: v_readlane_b32 s1, v0, 41
-; GCN-NEXT: v_readlane_b32 s2, v0, 42
-; GCN-NEXT: v_readlane_b32 s3, v0, 43
-; GCN-NEXT: v_readlane_b32 s4, v0, 44
-; GCN-NEXT: v_readlane_b32 s5, v0, 45
-; GCN-NEXT: v_readlane_b32 s6, v0, 46
-; GCN-NEXT: v_readlane_b32 s7, v0, 47
+; GCN-NEXT: v_readlane_b32 s0, v1, 25
+; GCN-NEXT: v_readlane_b32 s1, v1, 26
+; GCN-NEXT: v_readlane_b32 s2, v1, 27
+; GCN-NEXT: v_readlane_b32 s3, v1, 28
+; GCN-NEXT: v_readlane_b32 s4, v1, 29
+; GCN-NEXT: v_readlane_b32 s5, v1, 30
+; GCN-NEXT: v_readlane_b32 s6, v1, 31
+; GCN-NEXT: v_readlane_b32 s7, v1, 32
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v2, 0
-; GCN-NEXT: v_readlane_b32 s1, v2, 1
-; GCN-NEXT: v_readlane_b32 s2, v2, 2
-; GCN-NEXT: v_readlane_b32 s3, v2, 3
-; GCN-NEXT: v_readlane_b32 s4, v2, 4
-; GCN-NEXT: v_readlane_b32 s5, v2, 5
-; GCN-NEXT: v_readlane_b32 s6, v2, 6
-; GCN-NEXT: v_readlane_b32 s7, v2, 7
+; GCN-NEXT: v_readlane_b32 s0, v1, 33
+; GCN-NEXT: v_readlane_b32 s1, v1, 34
+; GCN-NEXT: v_readlane_b32 s2, v1, 35
+; GCN-NEXT: v_readlane_b32 s3, v1, 36
+; GCN-NEXT: v_readlane_b32 s4, v1, 37
+; GCN-NEXT: v_readlane_b32 s5, v1, 38
+; GCN-NEXT: v_readlane_b32 s6, v1, 39
+; GCN-NEXT: v_readlane_b32 s7, v1, 40
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[84:91]
+; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s0, v1, 41
+; GCN-NEXT: v_readlane_b32 s1, v1, 42
+; GCN-NEXT: v_readlane_b32 s2, v1, 43
+; GCN-NEXT: v_readlane_b32 s3, v1, 44
+; GCN-NEXT: v_readlane_b32 s4, v1, 45
+; GCN-NEXT: v_readlane_b32 s5, v1, 46
+; GCN-NEXT: v_readlane_b32 s6, v1, 47
+; GCN-NEXT: v_readlane_b32 s7, v1, 48
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[76:83]
+; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s0, v1, 49
+; GCN-NEXT: v_readlane_b32 s1, v1, 50
+; GCN-NEXT: v_readlane_b32 s2, v1, 51
+; GCN-NEXT: v_readlane_b32 s3, v1, 52
+; GCN-NEXT: v_readlane_b32 s4, v1, 53
+; GCN-NEXT: v_readlane_b32 s5, v1, 54
+; GCN-NEXT: v_readlane_b32 s6, v1, 55
+; GCN-NEXT: v_readlane_b32 s7, v1, 56
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[68:75]
+; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s0, v1, 57
+; GCN-NEXT: v_readlane_b32 s1, v1, 58
+; GCN-NEXT: v_readlane_b32 s2, v1, 59
+; GCN-NEXT: v_readlane_b32 s3, v1, 60
+; GCN-NEXT: v_readlane_b32 s4, v1, 61
+; GCN-NEXT: v_readlane_b32 s5, v1, 62
+; GCN-NEXT: v_readlane_b32 s6, v1, 63
+; GCN-NEXT: v_readlane_b32 s7, v2, 0
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[60:67]
+; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s0, v0, 9
+; GCN-NEXT: v_readlane_b32 s1, v0, 10
+; GCN-NEXT: v_readlane_b32 s2, v0, 11
+; GCN-NEXT: v_readlane_b32 s3, v0, 12
+; GCN-NEXT: v_readlane_b32 s4, v0, 13
+; GCN-NEXT: v_readlane_b32 s5, v0, 14
+; GCN-NEXT: v_readlane_b32 s6, v0, 15
+; GCN-NEXT: v_readlane_b32 s7, v0, 16
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[52:59]
+; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s0, v0, 17
+; GCN-NEXT: v_readlane_b32 s1, v0, 18
+; GCN-NEXT: v_readlane_b32 s2, v0, 19
+; GCN-NEXT: v_readlane_b32 s3, v0, 20
+; GCN-NEXT: v_readlane_b32 s4, v0, 21
+; GCN-NEXT: v_readlane_b32 s5, v0, 22
+; GCN-NEXT: v_readlane_b32 s6, v0, 23
+; GCN-NEXT: v_readlane_b32 s7, v0, 24
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[44:51]
+; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s0, v0, 25
+; GCN-NEXT: v_readlane_b32 s1, v0, 26
+; GCN-NEXT: v_readlane_b32 s2, v0, 27
+; GCN-NEXT: v_readlane_b32 s3, v0, 28
+; GCN-NEXT: v_readlane_b32 s4, v0, 29
+; GCN-NEXT: v_readlane_b32 s5, v0, 30
+; GCN-NEXT: v_readlane_b32 s6, v0, 31
+; GCN-NEXT: v_readlane_b32 s7, v0, 32
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[36:43]
+; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s0, v0, 33
+; GCN-NEXT: v_readlane_b32 s1, v0, 34
+; GCN-NEXT: v_readlane_b32 s2, v0, 35
+; GCN-NEXT: v_readlane_b32 s3, v0, 36
+; GCN-NEXT: v_readlane_b32 s4, v0, 37
+; GCN-NEXT: v_readlane_b32 s5, v0, 38
+; GCN-NEXT: v_readlane_b32 s6, v0, 39
+; GCN-NEXT: v_readlane_b32 s7, v0, 40
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[24:31]
+; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s0, v0, 41
+; GCN-NEXT: v_readlane_b32 s1, v0, 42
+; GCN-NEXT: v_readlane_b32 s2, v0, 43
+; GCN-NEXT: v_readlane_b32 s3, v0, 44
+; GCN-NEXT: v_readlane_b32 s4, v0, 45
+; GCN-NEXT: v_readlane_b32 s5, v0, 46
+; GCN-NEXT: v_readlane_b32 s6, v0, 47
+; GCN-NEXT: v_readlane_b32 s7, v0, 48
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[16:23]
+; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s0, v0, 49
+; GCN-NEXT: v_readlane_b32 s1, v0, 50
+; GCN-NEXT: v_readlane_b32 s2, v0, 51
+; GCN-NEXT: v_readlane_b32 s3, v0, 52
+; GCN-NEXT: v_readlane_b32 s4, v0, 53
+; GCN-NEXT: v_readlane_b32 s5, v0, 54
+; GCN-NEXT: v_readlane_b32 s6, v0, 55
+; GCN-NEXT: v_readlane_b32 s7, v0, 56
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[8:15]
+; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s0, v2, 1
+; GCN-NEXT: v_readlane_b32 s1, v2, 2
+; GCN-NEXT: v_readlane_b32 s2, v2, 3
+; GCN-NEXT: v_readlane_b32 s3, v2, 4
+; GCN-NEXT: v_readlane_b32 s4, v2, 5
+; GCN-NEXT: v_readlane_b32 s5, v2, 6
+; GCN-NEXT: v_readlane_b32 s6, v2, 7
+; GCN-NEXT: v_readlane_b32 s7, v2, 8
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
@@ -446,189 +448,191 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:19]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s4, 0
-; GCN-NEXT: v_writelane_b32 v0, s5, 1
-; GCN-NEXT: v_writelane_b32 v0, s6, 2
-; GCN-NEXT: v_writelane_b32 v0, s7, 3
-; GCN-NEXT: v_writelane_b32 v0, s8, 4
-; GCN-NEXT: v_writelane_b32 v0, s9, 5
-; GCN-NEXT: v_writelane_b32 v0, s10, 6
-; GCN-NEXT: v_writelane_b32 v0, s11, 7
-; GCN-NEXT: v_writelane_b32 v0, s12, 8
-; GCN-NEXT: v_writelane_b32 v0, s13, 9
-; GCN-NEXT: v_writelane_b32 v0, s14, 10
-; GCN-NEXT: v_writelane_b32 v0, s15, 11
-; GCN-NEXT: v_writelane_b32 v0, s16, 12
-; GCN-NEXT: v_writelane_b32 v0, s17, 13
-; GCN-NEXT: v_writelane_b32 v0, s18, 14
-; GCN-NEXT: v_writelane_b32 v0, s19, 15
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:19]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s4, 16
-; GCN-NEXT: v_writelane_b32 v0, s5, 17
-; GCN-NEXT: v_writelane_b32 v0, s6, 18
-; GCN-NEXT: v_writelane_b32 v0, s7, 19
-; GCN-NEXT: v_writelane_b32 v0, s8, 20
-; GCN-NEXT: v_writelane_b32 v0, s9, 21
-; GCN-NEXT: v_writelane_b32 v0, s10, 22
-; GCN-NEXT: v_writelane_b32 v0, s11, 23
-; GCN-NEXT: v_writelane_b32 v0, s12, 24
-; GCN-NEXT: v_writelane_b32 v0, s13, 25
-; GCN-NEXT: v_writelane_b32 v0, s14, 26
-; GCN-NEXT: v_writelane_b32 v0, s15, 27
-; GCN-NEXT: v_writelane_b32 v0, s16, 28
-; GCN-NEXT: v_writelane_b32 v0, s17, 29
-; GCN-NEXT: v_writelane_b32 v0, s18, 30
-; GCN-NEXT: v_writelane_b32 v0, s19, 31
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:19]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s4, 32
-; GCN-NEXT: v_writelane_b32 v0, s5, 33
-; GCN-NEXT: v_writelane_b32 v0, s6, 34
-; GCN-NEXT: v_writelane_b32 v0, s7, 35
-; GCN-NEXT: v_writelane_b32 v0, s8, 36
-; GCN-NEXT: v_writelane_b32 v0, s9, 37
-; GCN-NEXT: v_writelane_b32 v0, s10, 38
-; GCN-NEXT: v_writelane_b32 v0, s11, 39
-; GCN-NEXT: v_writelane_b32 v0, s12, 40
-; GCN-NEXT: v_writelane_b32 v0, s13, 41
-; GCN-NEXT: v_writelane_b32 v0, s14, 42
-; GCN-NEXT: v_writelane_b32 v0, s15, 43
-; GCN-NEXT: v_writelane_b32 v0, s16, 44
-; GCN-NEXT: v_writelane_b32 v0, s17, 45
-; GCN-NEXT: v_writelane_b32 v0, s18, 46
-; GCN-NEXT: v_writelane_b32 v0, s19, 47
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:19]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s4, 48
-; GCN-NEXT: v_writelane_b32 v0, s5, 49
-; GCN-NEXT: v_writelane_b32 v0, s6, 50
-; GCN-NEXT: v_writelane_b32 v0, s7, 51
-; GCN-NEXT: v_writelane_b32 v0, s8, 52
-; GCN-NEXT: v_writelane_b32 v0, s9, 53
-; GCN-NEXT: v_writelane_b32 v0, s10, 54
-; GCN-NEXT: v_writelane_b32 v0, s11, 55
-; GCN-NEXT: v_writelane_b32 v0, s12, 56
-; GCN-NEXT: v_writelane_b32 v0, s13, 57
-; GCN-NEXT: v_writelane_b32 v0, s14, 58
-; GCN-NEXT: v_writelane_b32 v0, s15, 59
-; GCN-NEXT: v_writelane_b32 v0, s16, 60
-; GCN-NEXT: v_writelane_b32 v0, s17, 61
-; GCN-NEXT: v_writelane_b32 v0, s18, 62
-; GCN-NEXT: v_writelane_b32 v0, s19, 63
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ; def s[36:51]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 0
-; GCN-NEXT: v_writelane_b32 v1, s5, 1
-; GCN-NEXT: v_writelane_b32 v1, s6, 2
-; GCN-NEXT: v_writelane_b32 v1, s7, 3
-; GCN-NEXT: v_writelane_b32 v1, s8, 4
-; GCN-NEXT: v_writelane_b32 v1, s9, 5
-; GCN-NEXT: v_writelane_b32 v1, s10, 6
-; GCN-NEXT: v_writelane_b32 v1, s11, 7
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[2:3]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s2, 8
-; GCN-NEXT: v_writelane_b32 v1, s3, 9
-; GCN-NEXT: s_mov_b32 s1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_cmp_lg_u32 s0, s1
+; GCN-NEXT: v_writelane_b32 v0, s0, 0
+; GCN-NEXT: v_writelane_b32 v0, s4, 1
+; GCN-NEXT: v_writelane_b32 v0, s5, 2
+; GCN-NEXT: v_writelane_b32 v0, s6, 3
+; GCN-NEXT: v_writelane_b32 v0, s7, 4
+; GCN-NEXT: v_writelane_b32 v0, s8, 5
+; GCN-NEXT: v_writelane_b32 v0, s9, 6
+; GCN-NEXT: v_writelane_b32 v0, s10, 7
+; GCN-NEXT: v_writelane_b32 v0, s11, 8
+; GCN-NEXT: v_writelane_b32 v0, s12, 9
+; GCN-NEXT: v_writelane_b32 v0, s13, 10
+; GCN-NEXT: v_writelane_b32 v0, s14, 11
+; GCN-NEXT: v_writelane_b32 v0, s15, 12
+; GCN-NEXT: v_writelane_b32 v0, s16, 13
+; GCN-NEXT: v_writelane_b32 v0, s17, 14
+; GCN-NEXT: v_writelane_b32 v0, s18, 15
+; GCN-NEXT: v_writelane_b32 v0, s19, 16
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[0:15]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[16:31]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s0, 17
+; GCN-NEXT: v_writelane_b32 v0, s1, 18
+; GCN-NEXT: v_writelane_b32 v0, s2, 19
+; GCN-NEXT: v_writelane_b32 v0, s3, 20
+; GCN-NEXT: v_writelane_b32 v0, s4, 21
+; GCN-NEXT: v_writelane_b32 v0, s5, 22
+; GCN-NEXT: v_writelane_b32 v0, s6, 23
+; GCN-NEXT: v_writelane_b32 v0, s7, 24
+; GCN-NEXT: v_writelane_b32 v0, s8, 25
+; GCN-NEXT: v_writelane_b32 v0, s9, 26
+; GCN-NEXT: v_writelane_b32 v0, s10, 27
+; GCN-NEXT: v_writelane_b32 v0, s11, 28
+; GCN-NEXT: v_writelane_b32 v0, s12, 29
+; GCN-NEXT: v_writelane_b32 v0, s13, 30
+; GCN-NEXT: v_writelane_b32 v0, s14, 31
+; GCN-NEXT: v_writelane_b32 v0, s15, 32
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[0:7]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[8:9]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_mov_b32 s10, 0
+; GCN-NEXT: v_readlane_b32 s11, v0, 0
+; GCN-NEXT: s_cmp_lg_u32 s11, s10
+; GCN-NEXT: v_writelane_b32 v0, s36, 33
+; GCN-NEXT: v_writelane_b32 v0, s37, 34
+; GCN-NEXT: v_writelane_b32 v0, s38, 35
+; GCN-NEXT: v_writelane_b32 v0, s39, 36
+; GCN-NEXT: v_writelane_b32 v0, s40, 37
+; GCN-NEXT: v_writelane_b32 v0, s41, 38
+; GCN-NEXT: v_writelane_b32 v0, s42, 39
+; GCN-NEXT: v_writelane_b32 v0, s43, 40
+; GCN-NEXT: v_writelane_b32 v0, s44, 41
+; GCN-NEXT: v_writelane_b32 v0, s45, 42
+; GCN-NEXT: v_writelane_b32 v0, s46, 43
+; GCN-NEXT: v_writelane_b32 v0, s47, 44
+; GCN-NEXT: v_writelane_b32 v0, s48, 45
+; GCN-NEXT: v_writelane_b32 v0, s49, 46
+; GCN-NEXT: v_writelane_b32 v0, s50, 47
+; GCN-NEXT: v_writelane_b32 v0, s51, 48
+; GCN-NEXT: v_writelane_b32 v0, s16, 49
+; GCN-NEXT: v_writelane_b32 v0, s17, 50
+; GCN-NEXT: v_writelane_b32 v0, s18, 51
+; GCN-NEXT: v_writelane_b32 v0, s19, 52
+; GCN-NEXT: v_writelane_b32 v0, s20, 53
+; GCN-NEXT: v_writelane_b32 v0, s21, 54
+; GCN-NEXT: v_writelane_b32 v0, s22, 55
+; GCN-NEXT: v_writelane_b32 v0, s23, 56
+; GCN-NEXT: v_writelane_b32 v0, s24, 57
+; GCN-NEXT: v_writelane_b32 v0, s25, 58
+; GCN-NEXT: v_writelane_b32 v0, s26, 59
+; GCN-NEXT: v_writelane_b32 v0, s27, 60
+; GCN-NEXT: v_writelane_b32 v0, s28, 61
+; GCN-NEXT: v_writelane_b32 v0, s29, 62
+; GCN-NEXT: v_writelane_b32 v0, s30, 63
+; GCN-NEXT: v_writelane_b32 v1, s31, 0
+; GCN-NEXT: v_writelane_b32 v1, s0, 1
+; GCN-NEXT: v_writelane_b32 v1, s1, 2
+; GCN-NEXT: v_writelane_b32 v1, s2, 3
+; GCN-NEXT: v_writelane_b32 v1, s3, 4
+; GCN-NEXT: v_writelane_b32 v1, s4, 5
+; GCN-NEXT: v_writelane_b32 v1, s5, 6
+; GCN-NEXT: v_writelane_b32 v1, s6, 7
+; GCN-NEXT: v_writelane_b32 v1, s7, 8
+; GCN-NEXT: v_writelane_b32 v1, s8, 9
+; GCN-NEXT: v_writelane_b32 v1, s9, 10
; GCN-NEXT: s_cbranch_scc1 BB1_2
; GCN-NEXT: ; %bb.1: ; %bb0
-; GCN-NEXT: v_readlane_b32 s16, v1, 8
-; GCN-NEXT: v_readlane_b32 s17, v1, 9
-; GCN-NEXT: v_readlane_b32 s20, v1, 0
-; GCN-NEXT: v_readlane_b32 s21, v1, 1
-; GCN-NEXT: v_readlane_b32 s22, v1, 2
-; GCN-NEXT: v_readlane_b32 s23, v1, 3
-; GCN-NEXT: v_readlane_b32 s24, v1, 4
-; GCN-NEXT: v_readlane_b32 s25, v1, 5
-; GCN-NEXT: v_readlane_b32 s26, v1, 6
-; GCN-NEXT: v_readlane_b32 s27, v1, 7
-; GCN-NEXT: v_readlane_b32 s36, v0, 32
-; GCN-NEXT: v_readlane_b32 s37, v0, 33
-; GCN-NEXT: v_readlane_b32 s38, v0, 34
-; GCN-NEXT: v_readlane_b32 s39, v0, 35
-; GCN-NEXT: v_readlane_b32 s40, v0, 36
-; GCN-NEXT: v_readlane_b32 s41, v0, 37
-; GCN-NEXT: v_readlane_b32 s42, v0, 38
-; GCN-NEXT: v_readlane_b32 s43, v0, 39
-; GCN-NEXT: v_readlane_b32 s44, v0, 40
-; GCN-NEXT: v_readlane_b32 s45, v0, 41
-; GCN-NEXT: v_readlane_b32 s46, v0, 42
-; GCN-NEXT: v_readlane_b32 s47, v0, 43
-; GCN-NEXT: v_readlane_b32 s48, v0, 44
-; GCN-NEXT: v_readlane_b32 s49, v0, 45
-; GCN-NEXT: v_readlane_b32 s50, v0, 46
-; GCN-NEXT: v_readlane_b32 s51, v0, 47
-; GCN-NEXT: v_readlane_b32 s0, v0, 0
-; GCN-NEXT: v_readlane_b32 s1, v0, 1
-; GCN-NEXT: v_readlane_b32 s2, v0, 2
-; GCN-NEXT: v_readlane_b32 s3, v0, 3
-; GCN-NEXT: v_readlane_b32 s4, v0, 4
-; GCN-NEXT: v_readlane_b32 s5, v0, 5
-; GCN-NEXT: v_readlane_b32 s6, v0, 6
-; GCN-NEXT: v_readlane_b32 s7, v0, 7
-; GCN-NEXT: v_readlane_b32 s8, v0, 8
-; GCN-NEXT: v_readlane_b32 s9, v0, 9
-; GCN-NEXT: v_readlane_b32 s10, v0, 10
-; GCN-NEXT: v_readlane_b32 s11, v0, 11
-; GCN-NEXT: v_readlane_b32 s12, v0, 12
-; GCN-NEXT: v_readlane_b32 s13, v0, 13
-; GCN-NEXT: v_readlane_b32 s14, v0, 14
-; GCN-NEXT: v_readlane_b32 s15, v0, 15
+; GCN-NEXT: v_readlane_b32 s0, v0, 1
+; GCN-NEXT: v_readlane_b32 s1, v0, 2
+; GCN-NEXT: v_readlane_b32 s2, v0, 3
+; GCN-NEXT: v_readlane_b32 s3, v0, 4
+; GCN-NEXT: v_readlane_b32 s4, v0, 5
+; GCN-NEXT: v_readlane_b32 s5, v0, 6
+; GCN-NEXT: v_readlane_b32 s6, v0, 7
+; GCN-NEXT: v_readlane_b32 s7, v0, 8
+; GCN-NEXT: v_readlane_b32 s8, v0, 9
+; GCN-NEXT: v_readlane_b32 s9, v0, 10
+; GCN-NEXT: v_readlane_b32 s10, v0, 11
+; GCN-NEXT: v_readlane_b32 s11, v0, 12
+; GCN-NEXT: v_readlane_b32 s12, v0, 13
+; GCN-NEXT: v_readlane_b32 s13, v0, 14
+; GCN-NEXT: v_readlane_b32 s14, v0, 15
+; GCN-NEXT: v_readlane_b32 s15, v0, 16
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 16
-; GCN-NEXT: v_readlane_b32 s1, v0, 17
-; GCN-NEXT: v_readlane_b32 s2, v0, 18
-; GCN-NEXT: v_readlane_b32 s3, v0, 19
-; GCN-NEXT: v_readlane_b32 s4, v0, 20
-; GCN-NEXT: v_readlane_b32 s5, v0, 21
-; GCN-NEXT: v_readlane_b32 s6, v0, 22
-; GCN-NEXT: v_readlane_b32 s7, v0, 23
-; GCN-NEXT: v_readlane_b32 s8, v0, 24
-; GCN-NEXT: v_readlane_b32 s9, v0, 25
-; GCN-NEXT: v_readlane_b32 s10, v0, 26
-; GCN-NEXT: v_readlane_b32 s11, v0, 27
-; GCN-NEXT: v_readlane_b32 s12, v0, 28
-; GCN-NEXT: v_readlane_b32 s13, v0, 29
-; GCN-NEXT: v_readlane_b32 s14, v0, 30
-; GCN-NEXT: v_readlane_b32 s15, v0, 31
+; GCN-NEXT: v_readlane_b32 s0, v0, 33
+; GCN-NEXT: v_readlane_b32 s1, v0, 34
+; GCN-NEXT: v_readlane_b32 s2, v0, 35
+; GCN-NEXT: v_readlane_b32 s3, v0, 36
+; GCN-NEXT: v_readlane_b32 s4, v0, 37
+; GCN-NEXT: v_readlane_b32 s5, v0, 38
+; GCN-NEXT: v_readlane_b32 s6, v0, 39
+; GCN-NEXT: v_readlane_b32 s7, v0, 40
+; GCN-NEXT: v_readlane_b32 s8, v0, 41
+; GCN-NEXT: v_readlane_b32 s9, v0, 42
+; GCN-NEXT: v_readlane_b32 s10, v0, 43
+; GCN-NEXT: v_readlane_b32 s11, v0, 44
+; GCN-NEXT: v_readlane_b32 s12, v0, 45
+; GCN-NEXT: v_readlane_b32 s13, v0, 46
+; GCN-NEXT: v_readlane_b32 s14, v0, 47
+; GCN-NEXT: v_readlane_b32 s15, v0, 48
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 48
-; GCN-NEXT: v_readlane_b32 s1, v0, 49
-; GCN-NEXT: v_readlane_b32 s2, v0, 50
-; GCN-NEXT: v_readlane_b32 s3, v0, 51
-; GCN-NEXT: v_readlane_b32 s4, v0, 52
-; GCN-NEXT: v_readlane_b32 s5, v0, 53
-; GCN-NEXT: v_readlane_b32 s6, v0, 54
-; GCN-NEXT: v_readlane_b32 s7, v0, 55
-; GCN-NEXT: v_readlane_b32 s8, v0, 56
-; GCN-NEXT: v_readlane_b32 s9, v0, 57
-; GCN-NEXT: v_readlane_b32 s10, v0, 58
-; GCN-NEXT: v_readlane_b32 s11, v0, 59
-; GCN-NEXT: v_readlane_b32 s12, v0, 60
-; GCN-NEXT: v_readlane_b32 s13, v0, 61
-; GCN-NEXT: v_readlane_b32 s14, v0, 62
-; GCN-NEXT: v_readlane_b32 s15, v0, 63
+; GCN-NEXT: v_readlane_b32 s0, v0, 17
+; GCN-NEXT: v_readlane_b32 s1, v0, 18
+; GCN-NEXT: v_readlane_b32 s2, v0, 19
+; GCN-NEXT: v_readlane_b32 s3, v0, 20
+; GCN-NEXT: v_readlane_b32 s4, v0, 21
+; GCN-NEXT: v_readlane_b32 s5, v0, 22
+; GCN-NEXT: v_readlane_b32 s6, v0, 23
+; GCN-NEXT: v_readlane_b32 s7, v0, 24
+; GCN-NEXT: v_readlane_b32 s8, v0, 25
+; GCN-NEXT: v_readlane_b32 s9, v0, 26
+; GCN-NEXT: v_readlane_b32 s10, v0, 27
+; GCN-NEXT: v_readlane_b32 s11, v0, 28
+; GCN-NEXT: v_readlane_b32 s12, v0, 29
+; GCN-NEXT: v_readlane_b32 s13, v0, 30
+; GCN-NEXT: v_readlane_b32 s14, v0, 31
+; GCN-NEXT: v_readlane_b32 s15, v0, 32
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[36:51]
+; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s0, v1, 1
+; GCN-NEXT: v_readlane_b32 s1, v1, 2
+; GCN-NEXT: v_readlane_b32 s2, v1, 3
+; GCN-NEXT: v_readlane_b32 s3, v1, 4
+; GCN-NEXT: v_readlane_b32 s4, v1, 5
+; GCN-NEXT: v_readlane_b32 s5, v1, 6
+; GCN-NEXT: v_readlane_b32 s6, v1, 7
+; GCN-NEXT: v_readlane_b32 s7, v1, 8
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[20:27]
+; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s0, v1, 9
+; GCN-NEXT: v_readlane_b32 s1, v1, 10
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[16:17]
+; GCN-NEXT: ; use s[0:1]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s0, v0, 49
+; GCN-NEXT: v_readlane_b32 s1, v0, 50
+; GCN-NEXT: v_readlane_b32 s2, v0, 51
+; GCN-NEXT: v_readlane_b32 s3, v0, 52
+; GCN-NEXT: v_readlane_b32 s4, v0, 53
+; GCN-NEXT: v_readlane_b32 s5, v0, 54
+; GCN-NEXT: v_readlane_b32 s6, v0, 55
+; GCN-NEXT: v_readlane_b32 s7, v0, 56
+; GCN-NEXT: v_readlane_b32 s8, v0, 57
+; GCN-NEXT: v_readlane_b32 s9, v0, 58
+; GCN-NEXT: v_readlane_b32 s10, v0, 59
+; GCN-NEXT: v_readlane_b32 s11, v0, 60
+; GCN-NEXT: v_readlane_b32 s12, v0, 61
+; GCN-NEXT: v_readlane_b32 s13, v0, 62
+; GCN-NEXT: v_readlane_b32 s14, v0, 63
+; GCN-NEXT: v_readlane_b32 s15, v1, 0
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
@@ -663,12 +667,12 @@ ret:
define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 {
; GCN-LABEL: no_vgprs_last_sgpr_spill:
; GCN: ; %bb.0:
-; GCN-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
-; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
-; GCN-NEXT: s_mov_b32 s54, -1
-; GCN-NEXT: s_mov_b32 s55, 0xe8f000
-; GCN-NEXT: s_add_u32 s52, s52, s3
-; GCN-NEXT: s_addc_u32 s53, s53, 0
+; GCN-NEXT: s_mov_b32 s56, SCRATCH_RSRC_DWORD0
+; GCN-NEXT: s_mov_b32 s57, SCRATCH_RSRC_DWORD1
+; GCN-NEXT: s_mov_b32 s58, -1
+; GCN-NEXT: s_mov_b32 s59, 0xe8f000
+; GCN-NEXT: s_add_u32 s56, s56, s3
+; GCN-NEXT: s_addc_u32 s57, s57, 0
; GCN-NEXT: s_load_dword s0, s[0:1], 0xb
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
@@ -685,176 +689,180 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:19]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v31, s4, 0
-; GCN-NEXT: v_writelane_b32 v31, s5, 1
-; GCN-NEXT: v_writelane_b32 v31, s6, 2
-; GCN-NEXT: v_writelane_b32 v31, s7, 3
-; GCN-NEXT: v_writelane_b32 v31, s8, 4
-; GCN-NEXT: v_writelane_b32 v31, s9, 5
-; GCN-NEXT: v_writelane_b32 v31, s10, 6
-; GCN-NEXT: v_writelane_b32 v31, s11, 7
-; GCN-NEXT: v_writelane_b32 v31, s12, 8
-; GCN-NEXT: v_writelane_b32 v31, s13, 9
-; GCN-NEXT: v_writelane_b32 v31, s14, 10
-; GCN-NEXT: v_writelane_b32 v31, s15, 11
-; GCN-NEXT: v_writelane_b32 v31, s16, 12
-; GCN-NEXT: v_writelane_b32 v31, s17, 13
-; GCN-NEXT: v_writelane_b32 v31, s18, 14
-; GCN-NEXT: v_writelane_b32 v31, s19, 15
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:19]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v31, s4, 16
-; GCN-NEXT: v_writelane_b32 v31, s5, 17
-; GCN-NEXT: v_writelane_b32 v31, s6, 18
-; GCN-NEXT: v_writelane_b32 v31, s7, 19
-; GCN-NEXT: v_writelane_b32 v31, s8, 20
-; GCN-NEXT: v_writelane_b32 v31, s9, 21
-; GCN-NEXT: v_writelane_b32 v31, s10, 22
-; GCN-NEXT: v_writelane_b32 v31, s11, 23
-; GCN-NEXT: v_writelane_b32 v31, s12, 24
-; GCN-NEXT: v_writelane_b32 v31, s13, 25
-; GCN-NEXT: v_writelane_b32 v31, s14, 26
-; GCN-NEXT: v_writelane_b32 v31, s15, 27
-; GCN-NEXT: v_writelane_b32 v31, s16, 28
-; GCN-NEXT: v_writelane_b32 v31, s17, 29
-; GCN-NEXT: v_writelane_b32 v31, s18, 30
-; GCN-NEXT: v_writelane_b32 v31, s19, 31
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:19]
+; GCN-NEXT: ; def s[36:51]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v31, s4, 32
-; GCN-NEXT: v_writelane_b32 v31, s5, 33
-; GCN-NEXT: v_writelane_b32 v31, s6, 34
-; GCN-NEXT: v_writelane_b32 v31, s7, 35
-; GCN-NEXT: v_writelane_b32 v31, s8, 36
-; GCN-NEXT: v_writelane_b32 v31, s9, 37
-; GCN-NEXT: v_writelane_b32 v31, s10, 38
-; GCN-NEXT: v_writelane_b32 v31, s11, 39
-; GCN-NEXT: v_writelane_b32 v31, s12, 40
-; GCN-NEXT: v_writelane_b32 v31, s13, 41
-; GCN-NEXT: v_writelane_b32 v31, s14, 42
-; GCN-NEXT: v_writelane_b32 v31, s15, 43
-; GCN-NEXT: v_writelane_b32 v31, s16, 44
-; GCN-NEXT: v_writelane_b32 v31, s17, 45
-; GCN-NEXT: v_writelane_b32 v31, s18, 46
-; GCN-NEXT: v_writelane_b32 v31, s19, 47
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[4:19]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v31, s4, 48
-; GCN-NEXT: v_writelane_b32 v31, s5, 49
-; GCN-NEXT: v_writelane_b32 v31, s6, 50
-; GCN-NEXT: v_writelane_b32 v31, s7, 51
-; GCN-NEXT: v_writelane_b32 v31, s8, 52
-; GCN-NEXT: v_writelane_b32 v31, s9, 53
-; GCN-NEXT: v_writelane_b32 v31, s10, 54
-; GCN-NEXT: v_writelane_b32 v31, s11, 55
-; GCN-NEXT: v_writelane_b32 v31, s12, 56
-; GCN-NEXT: v_writelane_b32 v31, s13, 57
-; GCN-NEXT: v_writelane_b32 v31, s14, 58
-; GCN-NEXT: v_writelane_b32 v31, s15, 59
-; GCN-NEXT: v_writelane_b32 v31, s16, 60
-; GCN-NEXT: v_writelane_b32 v31, s17, 61
-; GCN-NEXT: v_writelane_b32 v31, s18, 62
-; GCN-NEXT: v_writelane_b32 v31, s19, 63
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[2:3]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s2, 0
-; GCN-NEXT: v_writelane_b32 v0, s3, 1
-; GCN-NEXT: s_mov_b64 s[2:3], exec
-; GCN-NEXT: s_mov_b64 exec, 3
-; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
-; GCN-NEXT: s_mov_b64 exec, s[2:3]
-; GCN-NEXT: s_mov_b32 s1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_cmp_lg_u32 s0, s1
+; GCN-NEXT: v_writelane_b32 v31, s0, 0
+; GCN-NEXT: v_writelane_b32 v31, s4, 1
+; GCN-NEXT: v_writelane_b32 v31, s5, 2
+; GCN-NEXT: v_writelane_b32 v31, s6, 3
+; GCN-NEXT: v_writelane_b32 v31, s7, 4
+; GCN-NEXT: v_writelane_b32 v31, s8, 5
+; GCN-NEXT: v_writelane_b32 v31, s9, 6
+; GCN-NEXT: v_writelane_b32 v31, s10, 7
+; GCN-NEXT: v_writelane_b32 v31, s11, 8
+; GCN-NEXT: v_writelane_b32 v31, s12, 9
+; GCN-NEXT: v_writelane_b32 v31, s13, 10
+; GCN-NEXT: v_writelane_b32 v31, s14, 11
+; GCN-NEXT: v_writelane_b32 v31, s15, 12
+; GCN-NEXT: v_writelane_b32 v31, s16, 13
+; GCN-NEXT: v_writelane_b32 v31, s17, 14
+; GCN-NEXT: v_writelane_b32 v31, s18, 15
+; GCN-NEXT: v_writelane_b32 v31, s19, 16
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[0:15]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[16:31]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[34:35]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_mov_b32 s33, 0
+; GCN-NEXT: v_readlane_b32 s52, v31, 0
+; GCN-NEXT: s_cmp_lg_u32 s52, s33
+; GCN-NEXT: v_writelane_b32 v31, s36, 17
+; GCN-NEXT: v_writelane_b32 v31, s37, 18
+; GCN-NEXT: v_writelane_b32 v31, s38, 19
+; GCN-NEXT: v_writelane_b32 v31, s39, 20
+; GCN-NEXT: v_writelane_b32 v31, s40, 21
+; GCN-NEXT: v_writelane_b32 v31, s41, 22
+; GCN-NEXT: v_writelane_b32 v31, s42, 23
+; GCN-NEXT: v_writelane_b32 v31, s43, 24
+; GCN-NEXT: v_writelane_b32 v31, s44, 25
+; GCN-NEXT: v_writelane_b32 v31, s45, 26
+; GCN-NEXT: v_writelane_b32 v31, s46, 27
+; GCN-NEXT: v_writelane_b32 v31, s47, 28
+; GCN-NEXT: v_writelane_b32 v31, s48, 29
+; GCN-NEXT: v_writelane_b32 v31, s49, 30
+; GCN-NEXT: v_writelane_b32 v31, s50, 31
+; GCN-NEXT: v_writelane_b32 v31, s51, 32
+; GCN-NEXT: v_writelane_b32 v31, s0, 33
+; GCN-NEXT: v_writelane_b32 v31, s1, 34
+; GCN-NEXT: v_writelane_b32 v31, s2, 35
+; GCN-NEXT: v_writelane_b32 v31, s3, 36
+; GCN-NEXT: v_writelane_b32 v31, s4, 37
+; GCN-NEXT: v_writelane_b32 v31, s5, 38
+; GCN-NEXT: v_writelane_b32 v31, s6, 39
+; GCN-NEXT: v_writelane_b32 v31, s7, 40
+; GCN-NEXT: v_writelane_b32 v31, s8, 41
+; GCN-NEXT: v_writelane_b32 v31, s9, 42
+; GCN-NEXT: v_writelane_b32 v31, s10, 43
+; GCN-NEXT: v_writelane_b32 v31, s11, 44
+; GCN-NEXT: v_writelane_b32 v31, s12, 45
+; GCN-NEXT: v_writelane_b32 v31, s13, 46
+; GCN-NEXT: v_writelane_b32 v31, s14, 47
+; GCN-NEXT: v_writelane_b32 v31, s15, 48
+; GCN-NEXT: buffer_store_dword v0, off, s[56:59], 0
+; GCN-NEXT: v_writelane_b32 v0, s16, 0
+; GCN-NEXT: v_writelane_b32 v0, s17, 1
+; GCN-NEXT: v_writelane_b32 v0, s18, 2
+; GCN-NEXT: v_writelane_b32 v0, s19, 3
+; GCN-NEXT: v_writelane_b32 v0, s20, 4
+; GCN-NEXT: v_writelane_b32 v0, s21, 5
+; GCN-NEXT: v_writelane_b32 v0, s22, 6
+; GCN-NEXT: v_writelane_b32 v0, s23, 7
+; GCN-NEXT: v_writelane_b32 v0, s24, 8
+; GCN-NEXT: v_writelane_b32 v0, s25, 9
+; GCN-NEXT: v_writelane_b32 v0, s26, 10
+; GCN-NEXT: v_writelane_b32 v0, s27, 11
+; GCN-NEXT: v_writelane_b32 v0, s28, 12
+; GCN-NEXT: v_writelane_b32 v0, s29, 13
+; GCN-NEXT: v_writelane_b32 v0, s30, 14
+; GCN-NEXT: v_writelane_b32 v0, s31, 15
+; GCN-NEXT: s_mov_b64 s[16:17], exec
+; GCN-NEXT: s_mov_b64 exec, 0xffff
+; GCN-NEXT: buffer_store_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[16:17]
+; GCN-NEXT: v_writelane_b32 v31, s34, 49
+; GCN-NEXT: v_writelane_b32 v31, s35, 50
+; GCN-NEXT: buffer_load_dword v0, off, s[56:59], 0
; GCN-NEXT: s_cbranch_scc1 BB2_2
; GCN-NEXT: ; %bb.1: ; %bb0
-; GCN-NEXT: v_readlane_b32 s36, v31, 32
-; GCN-NEXT: v_readlane_b32 s37, v31, 33
-; GCN-NEXT: v_readlane_b32 s38, v31, 34
-; GCN-NEXT: v_readlane_b32 s39, v31, 35
-; GCN-NEXT: v_readlane_b32 s40, v31, 36
-; GCN-NEXT: v_readlane_b32 s41, v31, 37
-; GCN-NEXT: v_readlane_b32 s42, v31, 38
-; GCN-NEXT: v_readlane_b32 s43, v31, 39
-; GCN-NEXT: v_readlane_b32 s44, v31, 40
-; GCN-NEXT: v_readlane_b32 s45, v31, 41
-; GCN-NEXT: v_readlane_b32 s46, v31, 42
-; GCN-NEXT: v_readlane_b32 s47, v31, 43
-; GCN-NEXT: v_readlane_b32 s48, v31, 44
-; GCN-NEXT: v_readlane_b32 s49, v31, 45
-; GCN-NEXT: v_readlane_b32 s50, v31, 46
-; GCN-NEXT: v_readlane_b32 s51, v31, 47
-; GCN-NEXT: v_readlane_b32 s0, v31, 16
-; GCN-NEXT: v_readlane_b32 s1, v31, 17
-; GCN-NEXT: v_readlane_b32 s2, v31, 18
-; GCN-NEXT: v_readlane_b32 s3, v31, 19
-; GCN-NEXT: v_readlane_b32 s4, v31, 20
-; GCN-NEXT: v_readlane_b32 s5, v31, 21
-; GCN-NEXT: v_readlane_b32 s6, v31, 22
-; GCN-NEXT: v_readlane_b32 s7, v31, 23
-; GCN-NEXT: v_readlane_b32 s8, v31, 24
-; GCN-NEXT: v_readlane_b32 s9, v31, 25
-; GCN-NEXT: v_readlane_b32 s10, v31, 26
-; GCN-NEXT: v_readlane_b32 s11, v31, 27
-; GCN-NEXT: v_readlane_b32 s12, v31, 28
-; GCN-NEXT: v_readlane_b32 s13, v31, 29
-; GCN-NEXT: v_readlane_b32 s14, v31, 30
-; GCN-NEXT: v_readlane_b32 s15, v31, 31
-; GCN-NEXT: v_readlane_b32 s16, v31, 0
-; GCN-NEXT: v_readlane_b32 s17, v31, 1
-; GCN-NEXT: v_readlane_b32 s18, v31, 2
-; GCN-NEXT: v_readlane_b32 s19, v31, 3
-; GCN-NEXT: v_readlane_b32 s20, v31, 4
-; GCN-NEXT: v_readlane_b32 s21, v31, 5
-; GCN-NEXT: v_readlane_b32 s22, v31, 6
-; GCN-NEXT: v_readlane_b32 s23, v31, 7
-; GCN-NEXT: v_readlane_b32 s24, v31, 8
-; GCN-NEXT: v_readlane_b32 s25, v31, 9
-; GCN-NEXT: v_readlane_b32 s26, v31, 10
-; GCN-NEXT: v_readlane_b32 s27, v31, 11
-; GCN-NEXT: v_readlane_b32 s28, v31, 12
-; GCN-NEXT: v_readlane_b32 s29, v31, 13
-; GCN-NEXT: v_readlane_b32 s30, v31, 14
-; GCN-NEXT: v_readlane_b32 s31, v31, 15
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[16:31]
+; GCN-NEXT: v_readlane_b32 s0, v31, 1
+; GCN-NEXT: v_readlane_b32 s1, v31, 2
+; GCN-NEXT: v_readlane_b32 s2, v31, 3
+; GCN-NEXT: v_readlane_b32 s3, v31, 4
+; GCN-NEXT: v_readlane_b32 s4, v31, 5
+; GCN-NEXT: v_readlane_b32 s5, v31, 6
+; GCN-NEXT: v_readlane_b32 s6, v31, 7
+; GCN-NEXT: v_readlane_b32 s7, v31, 8
+; GCN-NEXT: v_readlane_b32 s8, v31, 9
+; GCN-NEXT: v_readlane_b32 s9, v31, 10
+; GCN-NEXT: v_readlane_b32 s10, v31, 11
+; GCN-NEXT: v_readlane_b32 s11, v31, 12
+; GCN-NEXT: v_readlane_b32 s12, v31, 13
+; GCN-NEXT: v_readlane_b32 s13, v31, 14
+; GCN-NEXT: v_readlane_b32 s14, v31, 15
+; GCN-NEXT: v_readlane_b32 s15, v31, 16
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s0, v31, 17
+; GCN-NEXT: v_readlane_b32 s1, v31, 18
+; GCN-NEXT: v_readlane_b32 s2, v31, 19
+; GCN-NEXT: v_readlane_b32 s3, v31, 20
+; GCN-NEXT: v_readlane_b32 s4, v31, 21
+; GCN-NEXT: v_readlane_b32 s5, v31, 22
+; GCN-NEXT: v_readlane_b32 s6, v31, 23
+; GCN-NEXT: v_readlane_b32 s7, v31, 24
+; GCN-NEXT: v_readlane_b32 s8, v31, 25
+; GCN-NEXT: v_readlane_b32 s9, v31, 26
+; GCN-NEXT: v_readlane_b32 s10, v31, 27
+; GCN-NEXT: v_readlane_b32 s11, v31, 28
+; GCN-NEXT: v_readlane_b32 s12, v31, 29
+; GCN-NEXT: v_readlane_b32 s13, v31, 30
+; GCN-NEXT: v_readlane_b32 s14, v31, 31
+; GCN-NEXT: v_readlane_b32 s15, v31, 32
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; use s[0:15]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s0, v31, 33
+; GCN-NEXT: v_readlane_b32 s1, v31, 34
+; GCN-NEXT: v_readlane_b32 s2, v31, 35
+; GCN-NEXT: v_readlane_b32 s3, v31, 36
+; GCN-NEXT: v_readlane_b32 s4, v31, 37
+; GCN-NEXT: v_readlane_b32 s5, v31, 38
+; GCN-NEXT: v_readlane_b32 s6, v31, 39
+; GCN-NEXT: v_readlane_b32 s7, v31, 40
+; GCN-NEXT: v_readlane_b32 s8, v31, 41
+; GCN-NEXT: v_readlane_b32 s9, v31, 42
+; GCN-NEXT: v_readlane_b32 s10, v31, 43
+; GCN-NEXT: v_readlane_b32 s11, v31, 44
+; GCN-NEXT: v_readlane_b32 s12, v31, 45
+; GCN-NEXT: v_readlane_b32 s13, v31, 46
+; GCN-NEXT: v_readlane_b32 s14, v31, 47
+; GCN-NEXT: v_readlane_b32 s15, v31, 48
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s4, v31, 48
-; GCN-NEXT: v_readlane_b32 s5, v31, 49
-; GCN-NEXT: v_readlane_b32 s6, v31, 50
-; GCN-NEXT: v_readlane_b32 s7, v31, 51
-; GCN-NEXT: v_readlane_b32 s8, v31, 52
-; GCN-NEXT: v_readlane_b32 s9, v31, 53
-; GCN-NEXT: v_readlane_b32 s10, v31, 54
-; GCN-NEXT: v_readlane_b32 s11, v31, 55
-; GCN-NEXT: v_readlane_b32 s12, v31, 56
-; GCN-NEXT: v_readlane_b32 s13, v31, 57
-; GCN-NEXT: v_readlane_b32 s14, v31, 58
-; GCN-NEXT: v_readlane_b32 s15, v31, 59
-; GCN-NEXT: v_readlane_b32 s16, v31, 60
-; GCN-NEXT: v_readlane_b32 s17, v31, 61
-; GCN-NEXT: v_readlane_b32 s18, v31, 62
-; GCN-NEXT: v_readlane_b32 s19, v31, 63
; GCN-NEXT: s_mov_b64 s[0:1], exec
-; GCN-NEXT: s_mov_b64 exec, 3
-; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, 0xffff
+; GCN-NEXT: buffer_load_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[0:1]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_readlane_b32 s0, v0, 0
; GCN-NEXT: v_readlane_b32 s1, v0, 1
+; GCN-NEXT: v_readlane_b32 s2, v0, 2
+; GCN-NEXT: v_readlane_b32 s3, v0, 3
+; GCN-NEXT: v_readlane_b32 s4, v0, 4
+; GCN-NEXT: v_readlane_b32 s5, v0, 5
+; GCN-NEXT: v_readlane_b32 s6, v0, 6
+; GCN-NEXT: v_readlane_b32 s7, v0, 7
+; GCN-NEXT: v_readlane_b32 s8, v0, 8
+; GCN-NEXT: v_readlane_b32 s9, v0, 9
+; GCN-NEXT: v_readlane_b32 s10, v0, 10
+; GCN-NEXT: v_readlane_b32 s11, v0, 11
+; GCN-NEXT: v_readlane_b32 s12, v0, 12
+; GCN-NEXT: v_readlane_b32 s13, v0, 13
+; GCN-NEXT: v_readlane_b32 s14, v0, 14
+; GCN-NEXT: v_readlane_b32 s15, v0, 15
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[36:51]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[4:19]
+; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s0, v31, 49
+; GCN-NEXT: v_readlane_b32 s1, v31, 50
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:1]
; GCN-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll
index 56a675e3ddad..73d837efa9f4 100644
--- a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll
@@ -11,8 +11,8 @@ define void @child_function() #0 {
; GCN: v_writelane_b32 v255, s30, 0
; GCN: v_writelane_b32 v255, s31, 1
; GCN: s_swappc_b64 s[30:31], s[4:5]
-; GCN: v_readlane_b32 s30, v255, 0
-; GCN: v_readlane_b32 s31, v255, 1
+; GCN: v_readlane_b32 s4, v255, 0
+; GCN: v_readlane_b32 s5, v255, 1
; GCN: v_readlane_b32 s33, v255, 2
; GCN: ; NumVgprs: 256
@@ -57,8 +57,8 @@ define void @reserve_vgpr_with_no_lower_vgpr_available() #0 {
; GCN: v_writelane_b32 v254, s30, 0
; GCN: v_writelane_b32 v254, s31, 1
; GCN: s_swappc_b64 s[30:31], s[4:5]
-; GCN: v_readlane_b32 s30, v254, 0
-; GCN: v_readlane_b32 s31, v254, 1
+; GCN: v_readlane_b32 s4, v254, 0
+; GCN: v_readlane_b32 s5, v254, 1
; GCN: v_readlane_b32 s33, v254, 2
define void @reserve_lowest_available_vgpr() #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
index c817b977eb9d..2138af8099f9 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
@@ -13,25 +13,25 @@ body: |
; SPILLED: bb.0:
; SPILLED: successors: %bb.1(0x80000000)
; SPILLED: S_NOP 0, implicit-def renamable $agpr0
- ; SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
- ; SPILLED: S_NOP 0, implicit-def renamable $agpr0
+ ; SPILLED: S_NOP 0, implicit-def renamable $agpr1
; SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; SPILLED: SI_SPILL_A32_SAVE killed $agpr1, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; SPILLED: bb.1:
; SPILLED: successors: %bb.2(0x80000000)
; SPILLED: S_NOP 1
; SPILLED: bb.2:
- ; SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5)
- ; SPILLED: $agpr1 = SI_SPILL_A32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
- ; SPILLED: S_NOP 0, implicit killed renamable $agpr0, implicit killed renamable $agpr1
+ ; SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; SPILLED: $agpr1 = SI_SPILL_A32_RESTORE %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5)
+ ; SPILLED: S_NOP 0, implicit renamable $agpr0, implicit renamable $agpr1
; EXPANDED-LABEL: name: spill_restore_agpr32
; EXPANDED: bb.0:
; EXPANDED: successors: %bb.1(0x80000000)
; EXPANDED: liveins: $vgpr0, $vgpr1
; EXPANDED: S_NOP 0, implicit-def renamable $agpr0
+ ; EXPANDED: S_NOP 0, implicit-def renamable $agpr1
; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
- ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0
- ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
+ ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec
; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; EXPANDED: bb.1:
; EXPANDED: successors: %bb.2(0x80000000)
@@ -41,7 +41,7 @@ body: |
; EXPANDED: liveins: $vgpr0, $vgpr1
; EXPANDED: $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec
; EXPANDED: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec
- ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0, implicit killed renamable $agpr1
+ ; EXPANDED: S_NOP 0, implicit renamable $agpr0, implicit renamable $agpr1
bb.0:
S_NOP 0, implicit-def %0:agpr_32
S_NOP 0, implicit-def %1:agpr_32
@@ -72,7 +72,7 @@ body: |
; SPILLED: S_NOP 1
; SPILLED: bb.2:
; SPILLED: $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
- ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1
+ ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1
; EXPANDED-LABEL: name: spill_restore_agpr64
; EXPANDED: bb.0:
; EXPANDED: successors: %bb.1(0x80000000)
@@ -89,7 +89,7 @@ body: |
; EXPANDED: liveins: $vgpr0, $vgpr1
; EXPANDED: $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
; EXPANDED: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1
- ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1
+ ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1
bb.0:
S_NOP 0, implicit-def %0:areg_64
S_CBRANCH_SCC1 implicit undef $scc, %bb.1
@@ -118,7 +118,6 @@ body: |
; SPILLED: bb.1:
; SPILLED: successors: %bb.2(0x80000000)
; SPILLED: bb.2:
- ; SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; SPILLED: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
; SPILLED: S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
; SPILLED: S_NOP 0, implicit undef $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
@@ -135,7 +134,8 @@ body: |
; SPILLED: S_NOP 0, implicit undef $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
; SPILLED: S_NOP 0, implicit undef $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247
; SPILLED: S_NOP 0, implicit undef $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255
- ; SPILLED: S_NOP 0, implicit killed renamable $agpr0
+ ; SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; SPILLED: S_NOP 0, implicit renamable $agpr0
; EXPANDED-LABEL: name: spill_restore_agpr32_used_all_vgprs
; EXPANDED: bb.0:
; EXPANDED: successors: %bb.1(0x80000000)
@@ -149,8 +149,6 @@ body: |
; EXPANDED: bb.1:
; EXPANDED: successors: %bb.2(0x80000000)
; EXPANDED: bb.2:
- ; EXPANDED: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
- ; EXPANDED: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
; EXPANDED: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
; EXPANDED: S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
; EXPANDED: S_NOP 0, implicit undef $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
@@ -167,7 +165,9 @@ body: |
; EXPANDED: S_NOP 0, implicit undef $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
; EXPANDED: S_NOP 0, implicit undef $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247
; EXPANDED: S_NOP 0, implicit undef $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255
- ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0
+ ; EXPANDED: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; EXPANDED: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+ ; EXPANDED: S_NOP 0, implicit renamable $agpr0
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255
@@ -214,7 +214,7 @@ body: |
; SPILLED: S_NOP 1
; SPILLED: bb.2:
; SPILLED: $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 12 from %stack.0, align 4, addrspace 5)
- ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2
+ ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2
; EXPANDED-LABEL: name: spill_restore_agpr96
; EXPANDED: bb.0:
; EXPANDED: successors: %bb.1(0x80000000)
@@ -233,7 +233,7 @@ body: |
; EXPANDED: $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
; EXPANDED: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2
; EXPANDED: $agpr2 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2
- ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2
+ ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2
bb.0:
S_NOP 0, implicit-def %0:areg_96
S_CBRANCH_SCC1 implicit undef $scc, %bb.1
@@ -263,7 +263,7 @@ body: |
; SPILLED: S_NOP 1
; SPILLED: bb.2:
; SPILLED: $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5)
- ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3
+ ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3
; EXPANDED-LABEL: name: spill_restore_agpr128
; EXPANDED: bb.0:
; EXPANDED: successors: %bb.1(0x80000000)
@@ -284,7 +284,7 @@ body: |
; EXPANDED: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
; EXPANDED: $agpr2 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
; EXPANDED: $agpr3 = V_ACCVGPR_WRITE_B32 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
- ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3
+ ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3
bb.0:
S_NOP 0, implicit-def %0:areg_128
S_CBRANCH_SCC1 implicit undef $scc, %bb.1
@@ -314,7 +314,7 @@ body: |
; SPILLED: S_NOP 1
; SPILLED: bb.2:
; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_A160_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 20 from %stack.0, align 4, addrspace 5)
- ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4
+ ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4
; EXPANDED-LABEL: name: spill_restore_agpr160
; EXPANDED: bb.0:
; EXPANDED: successors: %bb.1(0x80000000)
@@ -337,7 +337,7 @@ body: |
; EXPANDED: $agpr2 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
; EXPANDED: $agpr3 = V_ACCVGPR_WRITE_B32 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
; EXPANDED: $agpr4 = V_ACCVGPR_WRITE_B32 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
- ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4
+ ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4
bb.0:
S_NOP 0, implicit-def %0:areg_160
S_CBRANCH_SCC1 implicit undef $scc, %bb.1
@@ -367,7 +367,7 @@ body: |
; SPILLED: S_NOP 1
; SPILLED: bb.2:
; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_A192_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5)
- ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
+ ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
; EXPANDED-LABEL: name: spill_restore_agpr192
; EXPANDED: bb.0:
; EXPANDED: successors: %bb.1(0x80000000)
@@ -392,7 +392,7 @@ body: |
; EXPANDED: $agpr3 = V_ACCVGPR_WRITE_B32 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
; EXPANDED: $agpr4 = V_ACCVGPR_WRITE_B32 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
; EXPANDED: $agpr5 = V_ACCVGPR_WRITE_B32 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
- ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
+ ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
bb.0:
S_NOP 0, implicit-def %0:areg_192
S_CBRANCH_SCC1 implicit undef $scc, %bb.1
@@ -422,7 +422,7 @@ body: |
; SPILLED: S_NOP 1
; SPILLED: bb.2:
; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_A256_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 32 from %stack.0, align 4, addrspace 5)
- ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+ ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
; EXPANDED-LABEL: name: spill_restore_agpr256
; EXPANDED: bb.0:
; EXPANDED: successors: %bb.1(0x80000000)
@@ -451,7 +451,7 @@ body: |
; EXPANDED: $agpr5 = V_ACCVGPR_WRITE_B32 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
; EXPANDED: $agpr6 = V_ACCVGPR_WRITE_B32 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
; EXPANDED: $agpr7 = V_ACCVGPR_WRITE_B32 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+ ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
bb.0:
S_NOP 0, implicit-def %0:areg_256
S_CBRANCH_SCC1 implicit undef $scc, %bb.1
@@ -481,7 +481,7 @@ body: |
; SPILLED: S_NOP 1
; SPILLED: bb.2:
; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_A512_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 64 from %stack.0, align 4, addrspace 5)
- ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+ ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; EXPANDED-LABEL: name: spill_restore_agpr512
; EXPANDED: bb.0:
; EXPANDED: successors: %bb.1(0x80000000)
@@ -526,7 +526,7 @@ body: |
; EXPANDED: $agpr13 = V_ACCVGPR_WRITE_B32 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; EXPANDED: $agpr14 = V_ACCVGPR_WRITE_B32 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; EXPANDED: $agpr15 = V_ACCVGPR_WRITE_B32 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+ ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
bb.0:
S_NOP 0, implicit-def %0:areg_512
S_CBRANCH_SCC1 implicit undef $scc, %bb.1
@@ -556,7 +556,7 @@ body: |
; SPILLED: S_NOP 1
; SPILLED: bb.2:
; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_A1024_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 128 from %stack.0, align 4, addrspace 5)
- ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
+ ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
; EXPANDED-LABEL: name: spill_restore_agpr1024
; EXPANDED: bb.0:
; EXPANDED: successors: %bb.1(0x80000000)
@@ -633,7 +633,7 @@ body: |
; EXPANDED: $agpr29 = V_ACCVGPR_WRITE_B32 $vgpr29, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
; EXPANDED: $agpr30 = V_ACCVGPR_WRITE_B32 $vgpr30, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
; EXPANDED: $agpr31 = V_ACCVGPR_WRITE_B32 $vgpr31, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
+ ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
bb.0:
S_NOP 0, implicit-def %0:areg_1024
S_CBRANCH_SCC1 implicit undef $scc, %bb.1
diff --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
index 474461d2ae12..9b629a5f9111 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
@@ -1,32 +1,28 @@
-; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=TOVGPR -check-prefix=GCN %s
-; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=TOVGPR -check-prefix=GCN %s
-; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=TOVMEM -check-prefix=GCN %s
-; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=TOVMEM -check-prefix=GCN %s
+; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s
+; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s
+; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s
+; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s
; XXX - Why does it like to use vcc?
; GCN-LABEL: {{^}}spill_m0:
-; GCN: #ASMSTART
-; GCN-NEXT: s_mov_b32 m0, 0
-; GCN-NEXT: #ASMEND
-; GCN-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
+; GCN-DAG: s_cmp_lg_u32
-; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], [[M0_LANE:[0-9]+]]
+; TOVGPR-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
+; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 2
-; TOVMEM: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 0
-; TOVMEM: s_mov_b32 [[COPY_EXEC_LO:s[0-9]+]], exec_lo
-; TOVMEM: s_mov_b32 exec_lo, 1
-; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 ; 4-byte Folded Spill
-; TOVMEM: s_mov_b32 exec_lo, [[COPY_EXEC_LO]]
+; TOVMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
+; TOVMEM-DAG: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 0
+; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12 ; 4-byte Folded Spill
; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
; GCN: [[ENDIF]]:
-; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], [[M0_LANE]]
+; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], 2
; TOVGPR: s_mov_b32 m0, [[M0_RESTORE]]
-; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 ; 4-byte Folded Reload
+; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12 ; 4-byte Folded Reload
; TOVMEM: s_waitcnt vmcnt(0)
; TOVMEM: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[RELOAD_VREG]], 0
; TOVMEM: s_mov_b32 m0, [[M0_RESTORE]]
@@ -52,6 +48,8 @@ endif:
; m0 is killed, so it isn't necessary during the entry block spill to preserve it
; GCN-LABEL: {{^}}spill_kill_m0_lds:
+; GCN: s_mov_b32 m0, s6
+; GCN: v_interp_mov_f32
; GCN-NOT: v_readlane_b32 m0
; GCN-NOT: s_buffer_store_dword m0
@@ -81,11 +79,10 @@ endif: ; preds = %else, %if
; Force save and restore of m0 during SMEM spill
; GCN-LABEL: {{^}}m0_unavailable_spill:
-; GCN: s_load_dword [[REG0:s[0-9]+]], s[0:1], {{0x[0-9]+}}
; GCN: ; def m0, 1
-; GCN: s_mov_b32 m0, [[REG0]]
+; GCN: s_mov_b32 m0, s0
; GCN: v_interp_mov_f32
; GCN: ; clobber m0
@@ -127,17 +124,16 @@ endif:
}
; GCN-LABEL: {{^}}restore_m0_lds:
+; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]]
+; TOSMEM: s_cmp_eq_u32
; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it
; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_add_u32 m0, s3, {{0x[0-9]+}}
-; TOSMEM: s_buffer_store_dword s1, s[88:91], m0 ; 4-byte Folded Spill
+; TOSMEM: s_add_u32 m0, s3, 0x100
+; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s[88:91], m0 ; 4-byte Folded Spill
; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]]
-; TOSMEM: s_add_u32 m0, s3, {{0x[0-9]+}}
-; TOSMEM: s_waitcnt lgkmcnt(0)
+; TOSMEM: s_add_u32 m0, s3, 0x200
; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill
; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_cmp_eq_u32
; TOSMEM: s_cbranch_scc1
; TOSMEM: s_mov_b32 m0, -1
@@ -154,13 +150,6 @@ endif:
; TOSMEM: s_add_u32 m0, s3, 0x100
; TOSMEM: s_buffer_load_dword s2, s[88:91], m0 ; 4-byte Folded Reload
; FIXME-TOSMEM-NOT: m0
-
-; TOSMEM: s_mov_b32 [[REG1:s[0-9]+]], m0
-; TOSMEM: s_add_u32 m0, s3, 0x100
-; TOSMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[88:91], m0 ; 8-byte Folded Reload
-; TOSMEM: s_mov_b32 m0, [[REG1]]
-; TOSMEM: s_mov_b32 m0, -1
-
; TOSMEM: s_waitcnt lgkmcnt(0)
; TOSMEM-NOT: m0
; TOSMEM: s_mov_b32 m0, s2
diff --git a/llvm/test/CodeGen/AMDGPU/spill192.mir b/llvm/test/CodeGen/AMDGPU/spill192.mir
index 9344b0cb8c8a..25a2d6ebb006 100644
--- a/llvm/test/CodeGen/AMDGPU/spill192.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill192.mir
@@ -24,7 +24,7 @@ body: |
; SPILLED: S_NOP 1
; SPILLED: bb.2:
; SPILLED: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 = SI_SPILL_S192_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 24 from %stack.0, align 4, addrspace 5)
- ; SPILLED: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+ ; SPILLED: S_NOP 0, implicit renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED-LABEL: name: spill_restore_sgpr192
; EXPANDED: bb.0:
; EXPANDED: successors: %bb.1(0x80000000)
@@ -49,7 +49,7 @@ body: |
; EXPANDED: $sgpr7 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 3
; EXPANDED: $sgpr8 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 4
; EXPANDED: $sgpr9 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 5
- ; EXPANDED: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+ ; EXPANDED: S_NOP 0, implicit renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
bb.0:
S_NOP 0, implicit-def %0:sgpr_192
S_CBRANCH_SCC1 implicit undef $scc, %bb.1
@@ -79,7 +79,7 @@ body: |
; SPILLED: S_NOP 1
; SPILLED: bb.2:
; SPILLED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5)
- ; SPILLED: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+ ; SPILLED: S_NOP 0, implicit renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; EXPANDED-LABEL: name: spill_restore_vgpr192
; EXPANDED: bb.0:
; EXPANDED: successors: %bb.1(0x80000000)
@@ -91,7 +91,7 @@ body: |
; EXPANDED: S_NOP 1
; EXPANDED: bb.2:
; EXPANDED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5)
- ; EXPANDED: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+ ; EXPANDED: S_NOP 0, implicit renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
bb.0:
S_NOP 0, implicit-def %0:vreg_192
S_CBRANCH_SCC1 implicit undef $scc, %bb.1
diff --git a/llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir b/llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir
deleted file mode 100644
index 9f5b4793ecfb..000000000000
--- a/llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir
+++ /dev/null
@@ -1,32 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=regallocfast -o - %s | FileCheck %s
-
----
-name: bar
-tracksRegLiveness: true
-machineFunctionInfo:
- isEntryFunction: true
- scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
- stackPtrOffsetReg: '$sgpr32'
-body: |
- bb.0:
- liveins: $vgpr0
-
- ; CHECK-LABEL: name: bar
- ; CHECK: liveins: $vgpr0
- ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
- ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc
- ; CHECK: SI_SPILL_S64_SAVE $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5)
- ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc
- ; CHECK: $vcc = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 8 from %stack.0, align 4, addrspace 5)
- ; CHECK: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, 3, killed $sgpr4_sgpr5, implicit $exec
- ; CHECK: S_ENDPGM 0, implicit killed $vgpr0, implicit killed renamable $vcc
- %0:vgpr_32 = COPY $vgpr0
- V_CMP_NE_U32_e32 0, %0, implicit-def $vcc, implicit $exec
- %3:sreg_64_xexec = COPY $vcc
- %1:sreg_64_xexec = COPY $vcc
- %2:vgpr_32 = V_CNDMASK_B32_e64 0, -1, 0, 3, %1, implicit $exec
- $vgpr0 = COPY %2
- S_ENDPGM 0, implicit $vgpr0, implicit %3
-
-...
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
index 0193313ff213..1a48e76a241b 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
@@ -69,8 +69,8 @@ if:
merge:
%merge_value = phi i32 [ 0, %entry ], [%tmp137, %if ]
; GFX9-O3: v_cmp_eq_u32_e32 vcc, v[[FIRST]], v[[SECOND]]
-; GFX9-O0: buffer_load_dword v[[FIRST:[0-9]+]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[FIRST_IMM_OFFSET]]
; GFX9-O0: buffer_load_dword v[[SECOND:[0-9]+]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[SECOND_IMM_OFFSET]]
+; GFX9-O0: buffer_load_dword v[[FIRST:[0-9]+]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[FIRST_IMM_OFFSET]]
; GFX9-O0: v_cmp_eq_u32_e64 s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v[[FIRST]], v[[SECOND]]
%tmp138 = icmp eq i32 %tmp122, %merge_value
%tmp139 = sext i1 %tmp138 to i32
@@ -82,7 +82,7 @@ merge:
}
; GFX9-LABEL: {{^}}called:
-define hidden i32 @called(i32 %a) noinline {
+define i32 @called(i32 %a) noinline {
; GFX9: v_add_u32_e32 v1, v0, v0
%add = add i32 %a, %a
; GFX9: v_mul_lo_u32 v0, v1, v0
@@ -94,15 +94,10 @@ define hidden i32 @called(i32 %a) noinline {
; GFX9-LABEL: {{^}}call:
define amdgpu_kernel void @call(<4 x i32> inreg %tmp14, i32 inreg %arg) {
-; GFX9-DAG: s_load_dword [[ARG:s[0-9]+]]
-; GFX9-O0-DAG: s_mov_b32 s0, 0{{$}}
-; GFX9-O0-DAG: v_mov_b32_e32 v0, [[ARG]]
-
-; GFX9-O3: v_mov_b32_e32 v2, [[ARG]]
-
-
+; GFX9-O0: v_mov_b32_e32 v0, s0
+; GFX9-O3: v_mov_b32_e32 v2, s0
; GFX9-NEXT: s_not_b64 exec, exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, s1
; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_not_b64 exec, exec
%tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0)
@@ -112,11 +107,12 @@ define amdgpu_kernel void @call(<4 x i32> inreg %tmp14, i32 inreg %arg) {
%tmp134 = call i32 @called(i32 %tmp107)
; GFX9-O0: buffer_load_dword v1
; GFX9-O3: v_mov_b32_e32 v1, v0
-; GFX9-O0: v_add_u32_e32 v1, v0, v1
+; GFX9-O0: v_add_u32_e32 v0, v0, v1
; GFX9-O3: v_add_u32_e32 v1, v1, v2
%tmp136 = add i32 %tmp134, %tmp107
%tmp137 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp136)
-; GFX9: buffer_store_dword v0
+; GFX9-O0: buffer_store_dword v2
+; GFX9-O3: buffer_store_dword v0
call void @llvm.amdgcn.raw.buffer.store.i32(i32 %tmp137, <4 x i32> %tmp14, i32 4, i32 0, i32 0)
ret void
}
@@ -131,24 +127,19 @@ define i64 @called_i64(i64 %a) noinline {
; GFX9-LABEL: {{^}}call_i64:
define amdgpu_kernel void @call_i64(<4 x i32> inreg %tmp14, i64 inreg %arg) {
-; GFX9: s_load_dwordx2 s{{\[}}[[ARG_LO:[0-9]+]]:[[ARG_HI:[0-9]+]]{{\]}}
-
-; GFX9-O0: s_mov_b64 s{{\[}}[[ZERO_LO:[0-9]+]]:[[ZERO_HI:[0-9]+]]{{\]}}, 0{{$}}
-; GFX9-O0: v_mov_b32_e32 v1, s[[ARG_LO]]
-; GFX9-O0: v_mov_b32_e32 v2, s[[ARG_HI]]
-
-; GFX9-O3-DAG: v_mov_b32_e32 v7, s[[ARG_HI]]
-; GFX9-O3-DAG: v_mov_b32_e32 v6, s[[ARG_LO]]
-
-; GFX9: s_not_b64 exec, exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s[[ZERO_LO]]
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, s[[ZERO_HI]]
+; GFX9-O0: v_mov_b32_e32 v0, s0
+; GFX9-O0: v_mov_b32_e32 v1, s1
+; GFX9-O3: v_mov_b32_e32 v7, s1
+; GFX9-O3: v_mov_b32_e32 v6, s0
+; GFX9-NEXT: s_not_b64 exec, exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s3
; GFX9-O3-NEXT: v_mov_b32_e32 v6, 0
; GFX9-O3-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: s_not_b64 exec, exec
%tmp107 = tail call i64 @llvm.amdgcn.set.inactive.i64(i64 %arg, i64 0)
+; GFX9-O0: buffer_store_dword v0
; GFX9-O0: buffer_store_dword v1
-; GFX9-O0: buffer_store_dword v2
; GFX9: s_swappc_b64
%tmp134 = call i64 @called_i64(i64 %tmp107)
; GFX9-O0: buffer_load_dword v4
diff --git a/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll
index 662a78c4dfa6..b15145d85f17 100644
--- a/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll
+++ b/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -8,9 +8,6 @@
define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) #0 !dbg !4 {
entry:
%"alloca point" = bitcast i32 0 to i32
- br label %realentry
-
-realentry:
call void @llvm.dbg.value(metadata i32 %i, metadata !21, metadata !DIExpression()), !dbg !22
call void @llvm.dbg.value(metadata %struct.SVal* %location, metadata !23, metadata !DIExpression()), !dbg !22
%tmp = icmp ne i32 %i, 0, !dbg !25
diff --git a/llvm/test/CodeGen/ARM/Windows/alloca.ll b/llvm/test/CodeGen/ARM/Windows/alloca.ll
index ec3b130b3d8b..7db854df7296 100644
--- a/llvm/test/CodeGen/ARM/Windows/alloca.ll
+++ b/llvm/test/CodeGen/ARM/Windows/alloca.ll
@@ -17,11 +17,10 @@ entry:
; CHECK: bl num_entries
; Any register is actually valid here, but turns out we use lr,
; because we do not have the kill flag on R0.
-; CHECK: mov [[R0:r[0-9]+]], r0
-; CHECK: movs [[R1:r[0-9]+]], #7
-; CHECK: add.w [[R2:r[0-9]+]], [[R1]], [[R0]], lsl #2
-; CHECK: bic [[R2]], [[R2]], #4
-; CHECK: lsrs r4, [[R2]], #2
+; CHECK: movs [[R1:r1]], #7
+; CHECK: add.w [[R0:r[0-9]+]], [[R1]], [[R0]], lsl #2
+; CHECK: bic [[R0]], [[R0]], #4
+; CHECK: lsrs r4, [[R0]], #2
; CHECK: bl __chkstk
; CHECK: sub.w sp, sp, r4
diff --git a/llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll b/llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll
index 29336c2f7ffd..9e9a93e19b6a 100644
--- a/llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll
+++ b/llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll
@@ -7,10 +7,12 @@
; CHECK_LABEL: main:
; CHECK: ldr [[R2:r[0-9]+]], {{\[}}[[R1:r[0-9]+]]{{\]}}
; CHECK-NEXT: ldr [[R1]], {{\[}}[[R1]], #4]
-; CHECK: mov [[R4:r[0-9]+]], [[R1]]
-; CHECK: ldr [[R5:r[0-9]+]], {{\[}}[[R1]]{{\]}}
-; CHECK-NEXT: ldr [[R6:r[0-9]+]], {{\[}}[[R1]], #4]
-; CHECK: mov [[R7:r[0-9]+]], [[R6]]
+; CHECK: mov [[R4:r[0-9]+]], [[R2]]
+; CHECK-NEXT: mov [[R5:r[0-9]+]], [[R1]]
+; CHECK: ldr [[R2]], {{\[}}[[R1]]{{\]}}
+; CHECK-NEXT: ldr [[R1]], {{\[}}[[R1]], #4]
+; CHECK: mov [[R6:r[0-9]+]], [[R2]]
+; CHECK-NEXT: mov [[R7:r[0-9]+]], [[R1]]
define arm_aapcs_vfpcc i32 @main() #0 {
entry:
diff --git a/llvm/test/CodeGen/ARM/cmpxchg-O0.ll b/llvm/test/CodeGen/ARM/cmpxchg-O0.ll
index 1bc15dce2081..d3696cfe39a8 100644
--- a/llvm/test/CodeGen/ARM/cmpxchg-O0.ll
+++ b/llvm/test/CodeGen/ARM/cmpxchg-O0.ll
@@ -7,21 +7,19 @@
define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_8:
-; CHECK-DAG: mov [[ADDR:r[0-9]+]], r0
-; CHECK-DAG: mov [[NEW:r[0-9]+]], r2
; CHECK: dmb ish
; CHECK: uxtb [[DESIRED:r[0-9]+]], [[DESIRED]]
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldrexb [[OLD:[lr0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: ldrexb [[OLD:[lr0-9]+]], [r0]
; CHECK: cmp [[OLD]], [[DESIRED]]
; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: strexb [[STATUS:r[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: strexb [[STATUS:r[0-9]+]], r2, [r0]
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
; CHECK: bne [[RETRY]]
; CHECK: [[DONE]]:
; Materialisation of a boolean is done with sub/clz/lsr
; CHECK: uxtb [[CMP1:r[0-9]+]], [[DESIRED]]
-; CHECK: sub{{(\.w|s)?}} [[CMP1]], [[OLD]], [[CMP1]]
+; CHECK: sub{{(\.w)?}} [[CMP1]], [[OLD]], [[CMP1]]
; CHECK: clz [[CMP2:r[0-9]+]], [[CMP1]]
; CHECK: lsr{{(s)?}} {{r[0-9]+}}, [[CMP2]], #5
; CHECK: dmb ish
@@ -31,21 +29,19 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_16:
-; CHECK-DAG: mov [[ADDR:r[0-9]+]], r0
-; CHECK-DAG: mov [[NEW:r[0-9]+]], r2
; CHECK: dmb ish
; CHECK: uxth [[DESIRED:r[0-9]+]], [[DESIRED]]
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldrexh [[OLD:[lr0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: ldrexh [[OLD:[lr0-9]+]], [r0]
; CHECK: cmp [[OLD]], [[DESIRED]]
; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: strexh [[STATUS:r[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: strexh [[STATUS:r[0-9]+]], r2, [r0]
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
; CHECK: bne [[RETRY]]
; CHECK: [[DONE]]:
; Materialisation of a boolean is done with sub/clz/lsr
; CHECK: uxth [[CMP1:r[0-9]+]], [[DESIRED]]
-; CHECK: sub{{(\.w|s)?}} [[CMP1]], [[OLD]], [[CMP1]]
+; CHECK: sub{{(\.w)?}} [[CMP1]], [[OLD]], [[CMP1]]
; CHECK: clz [[CMP2:r[0-9]+]], [[CMP1]]
; CHECK: lsr{{(s)?}} {{r[0-9]+}}, [[CMP2]], #5
; CHECK: dmb ish
@@ -55,15 +51,13 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind
define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_32:
-; CHECK-DAG: mov [[ADDR:r[0-9]+]], r0
-; CHECK-DAG: mov [[NEW:r[0-9]+]], r2
; CHECK: dmb ish
; CHECK-NOT: uxt
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldrex [[OLD:r[0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: ldrex [[OLD:r[0-9]+]], [r0]
; CHECK: cmp [[OLD]], [[DESIRED]]
; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: strex [[STATUS:r[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0]
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
; CHECK: bne [[RETRY]]
; CHECK: [[DONE]]:
@@ -78,15 +72,14 @@ define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind
define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_64:
-; CHECK: mov [[ADDR:r[0-9]+]], r0
; CHECK: dmb ish
; CHECK-NOT: uxt
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], [r0]
; CHECK: cmp [[OLDLO]], r6
; CHECK: cmpeq [[OLDHI]], r7
; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: strexd [[STATUS:[lr0-9]+]], r8, r9, [r1]
+; CHECK: strexd [[STATUS:[lr0-9]+]], r4, r5, [r0]
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
; CHECK: bne [[RETRY]]
; CHECK: [[DONE]]:
@@ -97,15 +90,14 @@ define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind
define { i64, i1 } @test_nontrivial_args(i64* %addr, i64 %desired, i64 %new) {
; CHECK-LABEL: test_nontrivial_args:
-; CHECK: mov [[ADDR:r[0-9]+]], r0
; CHECK: dmb ish
; CHECK-NOT: uxt
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], [r0]
; CHECK: cmp [[OLDLO]], {{r[0-9]+}}
; CHECK: cmpeq [[OLDHI]], {{r[0-9]+}}
; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, {{\[}}[[ADDR]]{{\]}}
+; CHECK: strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r0]
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
; CHECK: bne [[RETRY]]
; CHECK: [[DONE]]:
diff --git a/llvm/test/CodeGen/ARM/crash-greedy-v6.ll b/llvm/test/CodeGen/ARM/crash-greedy-v6.ll
index a0241d95a7c6..d3c5057e3821 100644
--- a/llvm/test/CodeGen/ARM/crash-greedy-v6.ll
+++ b/llvm/test/CodeGen/ARM/crash-greedy-v6.ll
@@ -14,21 +14,21 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body, %for.body.lr.ph
; SOURCE-SCHED: ldr
; SOURCE-SCHED: ldr
-; SOURCE-SCHED: ldr
-; SOURCE-SCHED: ldr
-; SOURCE-SCHED: ldr
-; SOURCE-SCHED: add
; SOURCE-SCHED: add
+; SOURCE-SCHED: ldr
; SOURCE-SCHED: add
+; SOURCE-SCHED: ldr
; SOURCE-SCHED: add
; SOURCE-SCHED: ldr
+; SOURCE-SCHED: add
; SOURCE-SCHED: str
; SOURCE-SCHED: str
; SOURCE-SCHED: str
; SOURCE-SCHED: str
-; SOURCE-SCHED: bl
; SOURCE-SCHED: ldr
+; SOURCE-SCHED: bl
; SOURCE-SCHED: add
+; SOURCE-SCHED: ldr
; SOURCE-SCHED: cmp
; SOURCE-SCHED: bne
%i.031 = phi i32 [ 0, %for.body.lr.ph ], [ %0, %for.body ]
diff --git a/llvm/test/CodeGen/ARM/debug-info-blocks.ll b/llvm/test/CodeGen/ARM/debug-info-blocks.ll
index 1c9ffb1775aa..8b31e7a51d51 100644
--- a/llvm/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/llvm/test/CodeGen/ARM/debug-info-blocks.ll
@@ -6,7 +6,8 @@
; CHECK: DW_TAG_variable
; CHECK-NOT: DW_TAG
; CHECK-NEXT: DW_AT_location [DW_FORM_sec_offset]
-; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): {{.*}} DW_OP_plus_uconst 0x4, DW_OP_deref, DW_OP_plus_uconst 0x18)
+; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): {{.*}} DW_OP_plus_uconst 0x4, DW_OP_deref, DW_OP_plus_uconst 0x18
+; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): {{.*}} DW_OP_plus_uconst 0x4, DW_OP_deref, DW_OP_plus_uconst 0x18
; CHECK-NEXT: DW_AT_name {{.*}} "mydata"
; Radar 9331779
diff --git a/llvm/test/CodeGen/ARM/fast-isel-call.ll b/llvm/test/CodeGen/ARM/fast-isel-call.ll
index 293c268c5359..9c313c727aee 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-call.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-call.ll
@@ -41,31 +41,38 @@ define void @foo(i8 %a, i16 %b) nounwind {
; ARM: foo
; THUMB: foo
;; Materialize i1 1
-; ARM: movw [[REG0:r[0-9]+]], #1
-; THUMB: movs [[REG0:r[0-9]+]], #1
+; ARM: movw r2, #1
;; zero-ext
-; ARM: and [[REG1:r[0-9]+]], [[REG0]], #1
-; THUMB: and [[REG1:r[0-9]+]], [[REG0]], #1
+; ARM: and r2, r2, #1
+; THUMB: and r2, r2, #1
%1 = call i32 @t0(i1 zeroext 1)
-; ARM: sxtb r0, {{r[0-9]+}}
-; THUMB: sxtb r0, {{r[0-9]+}}
+; ARM: sxtb r2, r1
+; ARM: mov r0, r2
+; THUMB: sxtb r2, r1
+; THUMB: mov r0, r2
%2 = call i32 @t1(i8 signext %a)
-; ARM: and r0, {{r[0-9]+}}, #255
-; THUMB: and r0, {{r[0-9]+}}, #255
+; ARM: and r2, r1, #255
+; ARM: mov r0, r2
+; THUMB: and r2, r1, #255
+; THUMB: mov r0, r2
%3 = call i32 @t2(i8 zeroext %a)
-; ARM: sxth r0, {{r[0-9]+}}
-; THUMB: sxth r0, {{r[0-9]+}}
+; ARM: sxth r2, r1
+; ARM: mov r0, r2
+; THUMB: sxth r2, r1
+; THUMB: mov r0, r2
%4 = call i32 @t3(i16 signext %b)
-; ARM: uxth r0, {{r[0-9]+}}
-; THUMB: uxth r0, {{r[0-9]+}}
+; ARM: uxth r2, r1
+; ARM: mov r0, r2
+; THUMB: uxth r2, r1
+; THUMB: mov r0, r2
%5 = call i32 @t4(i16 zeroext %b)
;; A few test to check materialization
;; Note: i1 1 was materialized with t1 call
-; ARM: movw {{r[0-9]+}}, #255
+; ARM: movw r1, #255
%6 = call i32 @t2(i8 zeroext 255)
-; ARM: movw {{r[0-9]+}}, #65535
-; THUMB: movw {{r[0-9]+}}, #65535
+; ARM: movw r1, #65535
+; THUMB: movw r1, #65535
%7 = call i32 @t4(i16 zeroext 65535)
ret void
}
@@ -105,9 +112,10 @@ entry:
; ARM: bl {{_?}}bar
; ARM-LONG-LABEL: @t10
-; ARM-LONG-MACHO: {{(movw)|(ldr)}} [[R1:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
-; ARM-LONG-MACHO: {{(movt [[R1]], :upper16:L_bar\$non_lazy_ptr)?}}
-; ARM-LONG-MACHO: ldr [[R:r[0-9]+]], {{\[}}[[R1]]]
+; ARM-LONG-MACHO: {{(movw)|(ldr)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
+; ARM-LONG-MACHO: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}}
+; ARM-LONG-MACHO: str [[R]], [r7, [[SLOT:#[-0-9]+]]] @ 4-byte Spill
+; ARM-LONG-MACHO: ldr [[R:l?r[0-9]*]], [r7, [[SLOT]]] @ 4-byte Reload
; ARM-LONG-ELF: movw [[R:l?r[0-9]*]], :lower16:bar
; ARM-LONG-ELF: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}}
@@ -130,9 +138,11 @@ entry:
; THUMB-DAG: str.w [[R4]], [sp, #4]
; THUMB: bl {{_?}}bar
; THUMB-LONG-LABEL: @t10
-; THUMB-LONG: {{(movw)|(ldr.n)}} [[R1:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
-; THUMB-LONG: {{(movt [[R1]], :upper16:L_bar\$non_lazy_ptr)?}}
-; THUMB-LONG: ldr{{(.w)?}} [[R:r[0-9]+]], {{\[}}[[R1]]{{\]}}
+; THUMB-LONG: {{(movw)|(ldr.n)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
+; THUMB-LONG: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}}
+; THUMB-LONG: ldr{{(.w)?}} [[R]], {{\[}}[[R]]{{\]}}
+; THUMB-LONG: str [[R]], [sp, [[SLOT:#[-0-9]+]]] @ 4-byte Spill
+; THUMB-LONG: ldr.w [[R:l?r[0-9]*]], [sp, [[SLOT]]] @ 4-byte Reload
; THUMB-LONG: blx [[R]]
%call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
ret i32 0
diff --git a/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll b/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll
index bda4c6d47237..b308c4482d27 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -55,13 +55,16 @@ define void @t2() nounwind ssp {
; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; ARM-MACHO: ldr [[REG1:r[0-9]+]], [r0]
+; ARM-MACHO: ldr r0, [r0]
-; ARM-ELF: movw [[REG1:r[0-9]+]], :lower16:temp
-; ARM-ELF: movt [[REG1]], :upper16:temp
+; ARM-ELF: movw r0, :lower16:temp
+; ARM-ELF: movt r0, :upper16:temp
-; ARM: add r0, [[REG1]], #4
-; ARM: add r1, [[REG1]], #16
+; ARM: add r1, r0, #4
+; ARM: add r0, r0, #16
+; ARM: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
+; ARM: mov r0, r1
+; ARM: ldr r1, [sp[[SLOT]]] @ 4-byte Reload
; ARM: movw r2, #17
; ARM: bl {{_?}}memcpy
; ARM-LONG-LABEL: t2:
@@ -77,9 +80,12 @@ define void @t2() nounwind ssp {
; THUMB-LABEL: t2:
; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; THUMB: ldr [[REG1:r[0-9]+]], [r0]
-; THUMB: adds r0, [[REG1]], #4
-; THUMB: adds r1, #16
+; THUMB: ldr r0, [r0]
+; THUMB: adds r1, r0, #4
+; THUMB: adds r0, #16
+; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
+; THUMB: mov r0, r1
+; THUMB: ldr r1, [sp[[SLOT]]] @ 4-byte Reload
; THUMB: movs r2, #17
; THUMB: bl {{_?}}memcpy
; THUMB-LONG-LABEL: t2:
@@ -98,14 +104,15 @@ define void @t3() nounwind ssp {
; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0]
+; ARM-MACHO: ldr r0, [r0]
-; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp
-; ARM-ELF: movt [[REG0]], :upper16:temp
+; ARM-ELF: movw r0, :lower16:temp
+; ARM-ELF: movt r0, :upper16:temp
-; ARM: add r0, [[REG0]], #4
-; ARM: add r1, [[REG0]], #16
+; ARM: add r1, r0, #4
+; ARM: add r0, r0, #16
+; ARM: mov r0, r1
; ARM: movw r2, #10
; ARM: bl {{_?}}memmove
; ARM-LONG-LABEL: t3:
@@ -121,9 +128,12 @@ define void @t3() nounwind ssp {
; THUMB-LABEL: t3:
; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; THUMB: ldr [[REG1:r[0-9]+]], [r0]
-; THUMB: adds r0, [[REG1]], #4
-; THUMB: adds r1, #16
+; THUMB: ldr r0, [r0]
+; THUMB: adds r1, r0, #4
+; THUMB: adds r0, #16
+; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
+; THUMB: mov r0, r1
+; THUMB: ldr r1, [sp[[SLOT]]] @ 4-byte Reload
; THUMB: movs r2, #10
; THUMB: bl {{_?}}memmove
; THUMB-LONG-LABEL: t3:
@@ -140,28 +150,28 @@ define void @t4() nounwind ssp {
; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0]
+; ARM-MACHO: ldr r0, [r0]
-; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp
-; ARM-ELF: movt [[REG0]], :upper16:temp
+; ARM-ELF: movw r0, :lower16:temp
+; ARM-ELF: movt r0, :upper16:temp
-; ARM: ldr [[REG1:r[0-9]+]], {{\[}}[[REG0]], #16]
-; ARM: str [[REG1]], {{\[}}[[REG0]], #4]
-; ARM: ldr [[REG2:r[0-9]+]], {{\[}}[[REG0]], #20]
-; ARM: str [[REG2]], {{\[}}[[REG0]], #8]
-; ARM: ldrh [[REG3:r[0-9]+]], {{\[}}[[REG0]], #24]
-; ARM: strh [[REG3]], {{\[}}[[REG0]], #12]
+; ARM: ldr r1, [r0, #16]
+; ARM: str r1, [r0, #4]
+; ARM: ldr r1, [r0, #20]
+; ARM: str r1, [r0, #8]
+; ARM: ldrh r1, [r0, #24]
+; ARM: strh r1, [r0, #12]
; ARM: bx lr
; THUMB-LABEL: t4:
; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; THUMB: ldr [[REG1:r[0-9]+]], [r0]
-; THUMB: ldr [[REG2:r[0-9]+]], {{\[}}[[REG1]], #16]
-; THUMB: str [[REG2]], {{\[}}[[REG1]], #4]
-; THUMB: ldr [[REG3:r[0-9]+]], {{\[}}[[REG1]], #20]
-; THUMB: str [[REG3]], {{\[}}[[REG1]], #8]
-; THUMB: ldrh [[REG4:r[0-9]+]], {{\[}}[[REG1]], #24]
-; THUMB: strh [[REG4]], {{\[}}[[REG1]], #12]
+; THUMB: ldr r0, [r0]
+; THUMB: ldr r1, [r0, #16]
+; THUMB: str r1, [r0, #4]
+; THUMB: ldr r1, [r0, #20]
+; THUMB: str r1, [r0, #8]
+; THUMB: ldrh r1, [r0, #24]
+; THUMB: strh r1, [r0, #12]
; THUMB: bx lr
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false)
ret void
@@ -174,36 +184,36 @@ define void @t5() nounwind ssp {
; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0]
-
-; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp
-; ARM-ELF: movt [[REG0]], :upper16:temp
-
-; ARM: ldrh [[REG1:r[0-9]+]], {{\[}}[[REG0]], #16]
-; ARM: strh [[REG1]], {{\[}}[[REG0]], #4]
-; ARM: ldrh [[REG2:r[0-9]+]], {{\[}}[[REG0]], #18]
-; ARM: strh [[REG2]], {{\[}}[[REG0]], #6]
-; ARM: ldrh [[REG3:r[0-9]+]], {{\[}}[[REG0]], #20]
-; ARM: strh [[REG3]], {{\[}}[[REG0]], #8]
-; ARM: ldrh [[REG4:r[0-9]+]], {{\[}}[[REG0]], #22]
-; ARM: strh [[REG4]], {{\[}}[[REG0]], #10]
-; ARM: ldrh [[REG5:r[0-9]+]], {{\[}}[[REG0]], #24]
-; ARM: strh [[REG5]], {{\[}}[[REG0]], #12]
+; ARM-MACHO: ldr r0, [r0]
+
+; ARM-ELF: movw r0, :lower16:temp
+; ARM-ELF: movt r0, :upper16:temp
+
+; ARM: ldrh r1, [r0, #16]
+; ARM: strh r1, [r0, #4]
+; ARM: ldrh r1, [r0, #18]
+; ARM: strh r1, [r0, #6]
+; ARM: ldrh r1, [r0, #20]
+; ARM: strh r1, [r0, #8]
+; ARM: ldrh r1, [r0, #22]
+; ARM: strh r1, [r0, #10]
+; ARM: ldrh r1, [r0, #24]
+; ARM: strh r1, [r0, #12]
; ARM: bx lr
; THUMB-LABEL: t5:
; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; THUMB: ldr [[REG1:r[0-9]+]], [r0]
-; THUMB: ldrh [[REG2:r[0-9]+]], {{\[}}[[REG1]], #16]
-; THUMB: strh [[REG2]], {{\[}}[[REG1]], #4]
-; THUMB: ldrh [[REG3:r[0-9]+]], {{\[}}[[REG1]], #18]
-; THUMB: strh [[REG3]], {{\[}}[[REG1]], #6]
-; THUMB: ldrh [[REG4:r[0-9]+]], {{\[}}[[REG1]], #20]
-; THUMB: strh [[REG4]], {{\[}}[[REG1]], #8]
-; THUMB: ldrh [[REG5:r[0-9]+]], {{\[}}[[REG1]], #22]
-; THUMB: strh [[REG5]], {{\[}}[[REG1]], #10]
-; THUMB: ldrh [[REG6:r[0-9]+]], {{\[}}[[REG1]], #24]
-; THUMB: strh [[REG6]], {{\[}}[[REG1]], #12]
+; THUMB: ldr r0, [r0]
+; THUMB: ldrh r1, [r0, #16]
+; THUMB: strh r1, [r0, #4]
+; THUMB: ldrh r1, [r0, #18]
+; THUMB: strh r1, [r0, #6]
+; THUMB: ldrh r1, [r0, #20]
+; THUMB: strh r1, [r0, #8]
+; THUMB: ldrh r1, [r0, #22]
+; THUMB: strh r1, [r0, #10]
+; THUMB: ldrh r1, [r0, #24]
+; THUMB: strh r1, [r0, #12]
; THUMB: bx lr
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false)
ret void
@@ -214,56 +224,56 @@ define void @t6() nounwind ssp {
; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0]
-
-; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp
-; ARM-ELF: movt [[REG0]], :upper16:temp
-
-; ARM: ldrb [[REG1:r[0-9]+]], {{\[}}[[REG0]], #16]
-; ARM: strb [[REG1]], {{\[}}[[REG0]], #4]
-; ARM: ldrb [[REG2:r[0-9]+]], {{\[}}[[REG0]], #17]
-; ARM: strb [[REG2]], {{\[}}[[REG0]], #5]
-; ARM: ldrb [[REG3:r[0-9]+]], {{\[}}[[REG0]], #18]
-; ARM: strb [[REG3]], {{\[}}[[REG0]], #6]
-; ARM: ldrb [[REG4:r[0-9]+]], {{\[}}[[REG0]], #19]
-; ARM: strb [[REG4]], {{\[}}[[REG0]], #7]
-; ARM: ldrb [[REG5:r[0-9]+]], {{\[}}[[REG0]], #20]
-; ARM: strb [[REG5]], {{\[}}[[REG0]], #8]
-; ARM: ldrb [[REG6:r[0-9]+]], {{\[}}[[REG0]], #21]
-; ARM: strb [[REG6]], {{\[}}[[REG0]], #9]
-; ARM: ldrb [[REG7:r[0-9]+]], {{\[}}[[REG0]], #22]
-; ARM: strb [[REG7]], {{\[}}[[REG0]], #10]
-; ARM: ldrb [[REG8:r[0-9]+]], {{\[}}[[REG0]], #23]
-; ARM: strb [[REG8]], {{\[}}[[REG0]], #11]
-; ARM: ldrb [[REG9:r[0-9]+]], {{\[}}[[REG0]], #24]
-; ARM: strb [[REG9]], {{\[}}[[REG0]], #12]
-; ARM: ldrb [[REG10:r[0-9]+]], {{\[}}[[REG0]], #25]
-; ARM: strb [[REG10]], {{\[}}[[REG0]], #13]
+; ARM-MACHO: ldr r0, [r0]
+
+; ARM-ELF: movw r0, :lower16:temp
+; ARM-ELF: movt r0, :upper16:temp
+
+; ARM: ldrb r1, [r0, #16]
+; ARM: strb r1, [r0, #4]
+; ARM: ldrb r1, [r0, #17]
+; ARM: strb r1, [r0, #5]
+; ARM: ldrb r1, [r0, #18]
+; ARM: strb r1, [r0, #6]
+; ARM: ldrb r1, [r0, #19]
+; ARM: strb r1, [r0, #7]
+; ARM: ldrb r1, [r0, #20]
+; ARM: strb r1, [r0, #8]
+; ARM: ldrb r1, [r0, #21]
+; ARM: strb r1, [r0, #9]
+; ARM: ldrb r1, [r0, #22]
+; ARM: strb r1, [r0, #10]
+; ARM: ldrb r1, [r0, #23]
+; ARM: strb r1, [r0, #11]
+; ARM: ldrb r1, [r0, #24]
+; ARM: strb r1, [r0, #12]
+; ARM: ldrb r1, [r0, #25]
+; ARM: strb r1, [r0, #13]
; ARM: bx lr
; THUMB-LABEL: t6:
; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; THUMB: ldr [[REG0:r[0-9]+]], [r0]
-; THUMB: ldrb [[REG2:r[0-9]+]], {{\[}}[[REG0]], #16]
-; THUMB: strb [[REG2]], {{\[}}[[REG0]], #4]
-; THUMB: ldrb [[REG3:r[0-9]+]], {{\[}}[[REG0]], #17]
-; THUMB: strb [[REG3]], {{\[}}[[REG0]], #5]
-; THUMB: ldrb [[REG4:r[0-9]+]], {{\[}}[[REG0]], #18]
-; THUMB: strb [[REG4]], {{\[}}[[REG0]], #6]
-; THUMB: ldrb [[REG5:r[0-9]+]], {{\[}}[[REG0]], #19]
-; THUMB: strb [[REG5]], {{\[}}[[REG0]], #7]
-; THUMB: ldrb [[REG6:r[0-9]+]], {{\[}}[[REG0]], #20]
-; THUMB: strb [[REG6]], {{\[}}[[REG0]], #8]
-; THUMB: ldrb [[REG7:r[0-9]+]], {{\[}}[[REG0]], #21]
-; THUMB: strb [[REG7]], {{\[}}[[REG0]], #9]
-; THUMB: ldrb [[REG8:r[0-9]+]], {{\[}}[[REG0]], #22]
-; THUMB: strb [[REG8]], {{\[}}[[REG0]], #10]
-; THUMB: ldrb [[REG9:r[0-9]+]], {{\[}}[[REG0]], #23]
-; THUMB: strb [[REG9]], {{\[}}[[REG0]], #11]
-; THUMB: ldrb [[REG10:r[0-9]+]], {{\[}}[[REG0]], #24]
-; THUMB: strb [[REG10]], {{\[}}[[REG0]], #12]
-; THUMB: ldrb [[REG11:r[0-9]+]], {{\[}}[[REG0]], #25]
-; THUMB: strb [[REG11]], {{\[}}[[REG0]], #13]
+; THUMB: ldr r0, [r0]
+; THUMB: ldrb r1, [r0, #16]
+; THUMB: strb r1, [r0, #4]
+; THUMB: ldrb r1, [r0, #17]
+; THUMB: strb r1, [r0, #5]
+; THUMB: ldrb r1, [r0, #18]
+; THUMB: strb r1, [r0, #6]
+; THUMB: ldrb r1, [r0, #19]
+; THUMB: strb r1, [r0, #7]
+; THUMB: ldrb r1, [r0, #20]
+; THUMB: strb r1, [r0, #8]
+; THUMB: ldrb r1, [r0, #21]
+; THUMB: strb r1, [r0, #9]
+; THUMB: ldrb r1, [r0, #22]
+; THUMB: strb r1, [r0, #10]
+; THUMB: ldrb r1, [r0, #23]
+; THUMB: strb r1, [r0, #11]
+; THUMB: ldrb r1, [r0, #24]
+; THUMB: strb r1, [r0, #12]
+; THUMB: ldrb r1, [r0, #25]
+; THUMB: strb r1, [r0, #13]
; THUMB: bx lr
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false)
ret void
diff --git a/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll b/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
index 95942c271c9c..f24100b36db9 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
@@ -2,7 +2,7 @@
define i32 @t1(i32* nocapture %ptr) nounwind readonly {
entry:
-; THUMB-LABEL: t1:
+; THUMB: t1
%add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -1
%0 = load i32, i32* %add.ptr, align 4
; THUMB: ldr r{{[0-9]}}, [r0, #-4]
@@ -11,7 +11,7 @@ entry:
define i32 @t2(i32* nocapture %ptr) nounwind readonly {
entry:
-; THUMB-LABEL: t2:
+; THUMB: t2
%add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -63
%0 = load i32, i32* %add.ptr, align 4
; THUMB: ldr r{{[0-9]}}, [r0, #-252]
@@ -20,7 +20,7 @@ entry:
define i32 @t3(i32* nocapture %ptr) nounwind readonly {
entry:
-; THUMB-LABEL: t3:
+; THUMB: t3
%add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -64
%0 = load i32, i32* %add.ptr, align 4
; THUMB: ldr r{{[0-9]}}, [r0]
@@ -29,7 +29,7 @@ entry:
define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly {
entry:
-; THUMB-LABEL: t4:
+; THUMB: t4
%add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -1
%0 = load i16, i16* %add.ptr, align 2
; THUMB: ldrh r{{[0-9]}}, [r0, #-2]
@@ -38,7 +38,7 @@ entry:
define zeroext i16 @t5(i16* nocapture %ptr) nounwind readonly {
entry:
-; THUMB-LABEL: t5:
+; THUMB: t5
%add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -127
%0 = load i16, i16* %add.ptr, align 2
; THUMB: ldrh r{{[0-9]}}, [r0, #-254]
@@ -47,7 +47,7 @@ entry:
define zeroext i16 @t6(i16* nocapture %ptr) nounwind readonly {
entry:
-; THUMB-LABEL: t6:
+; THUMB: t6
%add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -128
%0 = load i16, i16* %add.ptr, align 2
; THUMB: ldrh r{{[0-9]}}, [r0]
@@ -56,7 +56,7 @@ entry:
define zeroext i8 @t7(i8* nocapture %ptr) nounwind readonly {
entry:
-; THUMB-LABEL: t7:
+; THUMB: t7
%add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -1
%0 = load i8, i8* %add.ptr, align 1
; THUMB: ldrb r{{[0-9]}}, [r0, #-1]
@@ -65,7 +65,7 @@ entry:
define zeroext i8 @t8(i8* nocapture %ptr) nounwind readonly {
entry:
-; THUMB-LABEL: t8:
+; THUMB: t8
%add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -255
%0 = load i8, i8* %add.ptr, align 1
; THUMB: ldrb r{{[0-9]}}, [r0, #-255]
@@ -74,7 +74,7 @@ entry:
define zeroext i8 @t9(i8* nocapture %ptr) nounwind readonly {
entry:
-; THUMB-LABEL: t9:
+; THUMB: t9
%add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -256
%0 = load i8, i8* %add.ptr, align 1
; THUMB: ldrb r{{[0-9]}}, [r0]
@@ -83,96 +83,81 @@ entry:
define void @t10(i32* nocapture %ptr) nounwind {
entry:
-; THUMB-LABEL: t10:
+; THUMB: t10
%add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -1
store i32 0, i32* %add.ptr, align 4
-; THUMB: mov [[REG:r[0-9]+]], r0
-; THUMB: str r{{[0-9]}}, {{\[}}[[REG]], #-4]
+; THUMB: str r{{[0-9]}}, [r0, #-4]
ret void
}
define void @t11(i32* nocapture %ptr) nounwind {
entry:
-; THUMB-LABEL: t11:
+; THUMB: t11
%add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -63
store i32 0, i32* %add.ptr, align 4
-; THUMB: mov [[REG:r[0-9]+]], r0
-; THUMB: str r{{[0-9]}}, {{\[}}[[REG]], #-252]
+; THUMB: str r{{[0-9]}}, [r0, #-252]
ret void
}
define void @t12(i32* nocapture %ptr) nounwind {
entry:
-; THUMB-LABEL: t12:
+; THUMB: t12
%add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -64
store i32 0, i32* %add.ptr, align 4
-; THUMB: movw [[REG:r[0-9]+]], #65280
-; THUMB: movt [[REG]], #65535
-; THUMB: add [[REG]], r0
-; THUMB: str r{{[0-9]}}, {{\[}}[[REG]]]
+; THUMB: str r{{[0-9]}}, [r0]
ret void
}
define void @t13(i16* nocapture %ptr) nounwind {
entry:
-; THUMB-LABEL: t13:
+; THUMB: t13
%add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -1
store i16 0, i16* %add.ptr, align 2
-; THUMB: mov [[REG:r[0-9]+]], r0
-; THUMB: strh r{{[0-9]}}, {{\[}}[[REG]], #-2]
+; THUMB: strh r{{[0-9]}}, [r0, #-2]
ret void
}
define void @t14(i16* nocapture %ptr) nounwind {
entry:
-; THUMB-LABEL: t14:
+; THUMB: t14
%add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -127
store i16 0, i16* %add.ptr, align 2
-; THUMB: mov [[REG:r[0-9]+]], r0
-; THUMB: strh r{{[0-9]}}, {{\[}}[[REG]], #-254]
+; THUMB: strh r{{[0-9]}}, [r0, #-254]
ret void
}
define void @t15(i16* nocapture %ptr) nounwind {
entry:
-; THUMB-LABEL: t15:
+; THUMB: t15
%add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -128
store i16 0, i16* %add.ptr, align 2
-; THUMB: movw [[REG:r[0-9]+]], #65280
-; THUMB: movt [[REG]], #65535
-; THUMB: add [[REG]], r0
-; THUMB: strh r{{[0-9]}}, {{\[}}[[REG]]]
+; THUMB: strh r{{[0-9]}}, [r0]
ret void
}
define void @t16(i8* nocapture %ptr) nounwind {
entry:
-; THUMB-LABEL: t16:
+; THUMB: t16
%add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -1
store i8 0, i8* %add.ptr, align 1
-; THUMB: mov [[REG:r[0-9]+]], r0
-; THUMB: strb r{{[0-9]}}, {{\[}}[[REG]], #-1]
+; THUMB: strb r{{[0-9]}}, [r0, #-1]
ret void
}
define void @t17(i8* nocapture %ptr) nounwind {
entry:
-; THUMB-LABEL: t17:
+; THUMB: t17
%add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -255
store i8 0, i8* %add.ptr, align 1
-; THUMB: mov [[REG:r[0-9]+]], r0
-; THUMB: strb r{{[0-9]}}, {{\[}}[[REG]], #-255]
+; THUMB: strb r{{[0-9]}}, [r0, #-255]
ret void
}
define void @t18(i8* nocapture %ptr) nounwind {
entry:
-; THUMB-LABEL: t18:
+; THUMB: t18
%add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -256
store i8 0, i8* %add.ptr, align 1
-; THUMB: movw [[REG:r[0-9]+]], #65280
-; THUMB: movt [[REG]], #65535
-; THUMB: add [[REG]], r0
-; THUMB: strb r{{[0-9]}}, {{\[}}[[REG]]]
+; THUMB: strb r{{[0-9]}}, [r0]
ret void
}
diff --git a/llvm/test/CodeGen/ARM/fast-isel-select.ll b/llvm/test/CodeGen/ARM/fast-isel-select.ll
index 70987422dfde..0da63499f302 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-select.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-select.ll
@@ -21,12 +21,14 @@ entry:
define i32 @t2(i1 %c, i32 %a) nounwind readnone {
entry:
; ARM: t2
-; ARM: tst {{r[0-9]+}}, #1
-; ARM: moveq {{r[0-9]+}}, #20
+; ARM: tst r0, #1
+; ARM: moveq r{{[1-9]}}, #20
+; ARM: mov r0, r{{[1-9]}}
; THUMB-LABEL: t2
-; THUMB: tst.w {{r[0-9]+}}, #1
+; THUMB: tst.w r0, #1
; THUMB: it eq
-; THUMB: moveq {{r[0-9]+}}, #20
+; THUMB: moveq r{{[1-9]}}, #20
+; THUMB: mov r0, r{{[1-9]}}
%0 = select i1 %c, i32 %a, i32 20
ret i32 %0
}
@@ -41,7 +43,7 @@ entry:
; THUMB: tst.w r0, #1
; THUMB: it ne
; THUMB: movne r2, r1
-; THUMB: adds r0, r2, r1
+; THUMB: add.w r0, r2, r1
%0 = select i1 %c, i32 %a, i32 %b
%1 = add i32 %0, %a
ret i32 %1
@@ -65,12 +67,14 @@ entry:
define i32 @t5(i1 %c, i32 %a) nounwind readnone {
entry:
; ARM: t5
-; ARM: tst {{r[0-9]+}}, #1
-; ARM: mvneq {{r[0-9]+}}, #1
+; ARM: tst r0, #1
+; ARM: mvneq r{{[1-9]}}, #1
+; ARM: mov r0, r{{[1-9]}}
; THUMB: t5
-; THUMB: tst.w {{r[0-9]+}}, #1
+; THUMB: tst.w r0, #1
; THUMB: it eq
-; THUMB: mvneq {{r[0-9]+}}, #1
+; THUMB: mvneq r{{[1-9]}}, #1
+; THUMB: mov r0, r{{[1-9]}}
%0 = select i1 %c, i32 %a, i32 -2
ret i32 %0
}
@@ -79,12 +83,14 @@ entry:
define i32 @t6(i1 %c, i32 %a) nounwind readnone {
entry:
; ARM: t6
-; ARM: tst {{r[0-9]+}}, #1
-; ARM: mvneq {{r[0-9]+}}, #978944
+; ARM: tst r0, #1
+; ARM: mvneq r{{[1-9]}}, #978944
+; ARM: mov r0, r{{[1-9]}}
; THUMB: t6
-; THUMB: tst.w {{r[0-9]+}}, #1
+; THUMB: tst.w r0, #1
; THUMB: it eq
-; THUMB: mvneq {{r[0-9]+}}, #978944
+; THUMB: mvneq r{{[1-9]}}, #978944
+; THUMB: mov r0, r{{[1-9]}}
%0 = select i1 %c, i32 %a, i32 -978945
ret i32 %0
}
diff --git a/llvm/test/CodeGen/ARM/fast-isel-vararg.ll b/llvm/test/CodeGen/ARM/fast-isel-vararg.ll
index 3a9011ba622a..ffc3d9a05d88 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-vararg.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-vararg.ll
@@ -17,24 +17,26 @@ entry:
%4 = load i32, i32* %n, align 4
; ARM: VarArg
; ARM: mov [[FP:r[0-9]+]], sp
-; ARM: sub sp, sp, #32
+; ARM: sub sp, sp, #{{(36|40)}}
; ARM: ldr r1, {{\[}}[[FP]], #-4]
; ARM: ldr r2, {{\[}}[[FP]], #-8]
; ARM: ldr r3, {{\[}}[[FP]], #-12]
-; ARM: ldr [[Ra:r[0-9]+|lr]], [sp, #16]
-; ARM: ldr [[Rb:[lr]+[0-9]*]], [sp, #12]
-; ARM: movw r0, #5
-; ARM: str [[Ra]], [sp]
+; ARM: ldr [[Ra:r[0-9]+]], {{\[}}[[FP]], #-16]
+; ARM: ldr [[Rb:[lr]+[0-9]*]], [sp, #{{(16|20)}}]
+; ARM: movw [[Rc:[lr]+[0-9]*]], #5
+; Ra got spilled
+; ARM: mov r0, [[Rc]]
+; ARM: str {{.*}}, [sp]
; ARM: str [[Rb]], [sp, #4]
; ARM: bl {{_?CallVariadic}}
-; THUMB: sub sp, #32
-; THUMB: ldr r1, [sp, #28]
-; THUMB: ldr r2, [sp, #24]
-; THUMB: ldr r3, [sp, #20]
-; THUMB: ldr.w [[Ra:r[0-9]+]], [sp, #16]
-; THUMB: ldr.w [[Rb:r[0-9]+]], [sp, #12]
-; THUMB: str.w [[Ra]], [sp]
-; THUMB: str.w [[Rb]], [sp, #4]
+; THUMB: sub sp, #{{36}}
+; THUMB: ldr r1, [sp, #32]
+; THUMB: ldr r2, [sp, #28]
+; THUMB: ldr r3, [sp, #24]
+; THUMB: ldr {{[a-z0-9]+}}, [sp, #20]
+; THUMB: ldr.w {{[a-z0-9]+}}, [sp, #16]
+; THUMB: str.w {{[a-z0-9]+}}, [sp]
+; THUMB: str.w {{[a-z0-9]+}}, [sp, #4]
; THUMB: bl {{_?}}CallVariadic
%call = call i32 (i32, ...) @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4)
store i32 %call, i32* %tmp, align 4
diff --git a/llvm/test/CodeGen/ARM/ldrd.ll b/llvm/test/CodeGen/ARM/ldrd.ll
index 2bba84141380..b4325c78dbf2 100644
--- a/llvm/test/CodeGen/ARM/ldrd.ll
+++ b/llvm/test/CodeGen/ARM/ldrd.ll
@@ -81,12 +81,11 @@ return: ; preds = %bb, %entry
; CHECK-LABEL: Func1:
define void @Func1() nounwind ssp "frame-pointer"="all" {
entry:
-; A8: movw [[BASER:r[0-9]+]], :lower16:{{.*}}TestVar{{.*}}
-; A8: movt [[BASER]], :upper16:{{.*}}TestVar{{.*}}
-; A8: ldr [[BASE:r[0-9]+]], {{\[}}[[BASER]]]
+; A8: movw [[BASE:r[0-9]+]], :lower16:{{.*}}TestVar{{.*}}
+; A8: movt [[BASE]], :upper16:{{.*}}TestVar{{.*}}
; A8: ldrd [[FIELD1:r[0-9]+]], [[FIELD2:r[0-9]+]], {{\[}}[[BASE]], #4]
-; A8-NEXT: add [[FIELD2]], [[FIELD1]]
-; A8-NEXT: str [[FIELD2]], {{\[}}[[BASE]]{{\]}}
+; A8-NEXT: add [[FIELD1]], [[FIELD2]]
+; A8-NEXT: str [[FIELD1]], {{\[}}[[BASE]]{{\]}}
; CONSERVATIVE-NOT: ldrd
%orig_blocks = alloca [256 x i16], align 2
%0 = bitcast [256 x i16]* %orig_blocks to i8*call void @llvm.lifetime.start.p0i8(i64 512, i8* %0) nounwind
diff --git a/llvm/test/CodeGen/ARM/legalize-bitcast.ll b/llvm/test/CodeGen/ARM/legalize-bitcast.ll
index 67ea37aa3503..529775df5fd7 100644
--- a/llvm/test/CodeGen/ARM/legalize-bitcast.ll
+++ b/llvm/test/CodeGen/ARM/legalize-bitcast.ll
@@ -14,17 +14,17 @@ define i32 @vec_to_int() {
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: @ implicit-def: $d17
; CHECK-NEXT: vmov.32 d17[0], r0
-; CHECK-NEXT: vrev32.16 d18, d17
+; CHECK-NEXT: vrev32.16 d17, d17
; CHECK-NEXT: vrev16.8 d16, d16
-; CHECK-NEXT: @ kill: def $d16 killed $d16 def $q8
-; CHECK-NEXT: vmov.f64 d17, d18
-; CHECK-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill
+; CHECK-NEXT: vmov.f64 d18, d16
+; CHECK-NEXT: vmov.f64 d19, d17
+; CHECK-NEXT: vstmia sp, {d18, d19} @ 16-byte Spill
; CHECK-NEXT: b .LBB0_1
; CHECK-NEXT: .LBB0_1: @ %bb.1
; CHECK-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload
-; CHECK-NEXT: vrev32.16 q8, q8
-; CHECK-NEXT: vmov.f64 d16, d17
-; CHECK-NEXT: vmov.32 r0, d16[0]
+; CHECK-NEXT: vrev32.16 q9, q8
+; CHECK-NEXT: @ kill: def $d19 killed $d19 killed $q9
+; CHECK-NEXT: vmov.32 r0, d19[0]
; CHECK-NEXT: add sp, sp, #28
; CHECK-NEXT: pop {r4}
; CHECK-NEXT: bx lr
@@ -41,15 +41,15 @@ bb.1:
define i16 @int_to_vec(i80 %in) {
; CHECK-LABEL: int_to_vec:
; CHECK: @ %bb.0:
-; CHECK-NEXT: @ kill: def $r2 killed $r1
-; CHECK-NEXT: @ kill: def $r2 killed $r0
+; CHECK-NEXT: mov r3, r1
+; CHECK-NEXT: mov r12, r0
; CHECK-NEXT: lsl r0, r0, #16
; CHECK-NEXT: orr r0, r0, r1, lsr #16
-; CHECK-NEXT: @ implicit-def: $d18
-; CHECK-NEXT: vmov.32 d18[0], r0
-; CHECK-NEXT: @ implicit-def: $q8
-; CHECK-NEXT: vmov.f64 d16, d18
-; CHECK-NEXT: vrev32.16 q8, q8
+; CHECK-NEXT: @ implicit-def: $d16
+; CHECK-NEXT: vmov.32 d16[0], r0
+; CHECK-NEXT: @ implicit-def: $q9
+; CHECK-NEXT: vmov.f64 d18, d16
+; CHECK-NEXT: vrev32.16 q8, q9
; CHECK-NEXT: @ kill: def $d16 killed $d16 killed $q8
; CHECK-NEXT: vmov.u16 r0, d16[0]
; CHECK-NEXT: bx lr
diff --git a/llvm/test/CodeGen/ARM/pr47454.ll b/llvm/test/CodeGen/ARM/pr47454.ll
index 399de44ec731..d36a29c4e77c 100644
--- a/llvm/test/CodeGen/ARM/pr47454.ll
+++ b/llvm/test/CodeGen/ARM/pr47454.ll
@@ -16,23 +16,23 @@ define internal fastcc void @main() {
; CHECK-NEXT: ldrh r0, [r11, #-2]
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: vmov s0, r0
-; CHECK-NEXT: vstr s0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: vstr s0, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: bl getConstant
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vldr s0, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: vmov r1, s0
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: mov r1, r0
-; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: uxth r1, r1
-; CHECK-NEXT: vmov s0, r1
; CHECK-NEXT: uxth r0, r0
-; CHECK-NEXT: vmov s1, r0
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: uxth r1, r0
+; CHECK-NEXT: vmov s1, r1
; CHECK-NEXT: bl isEqual
; CHECK-NEXT: mov sp, r11
; CHECK-NEXT: pop {r11, pc}
diff --git a/llvm/test/CodeGen/ARM/stack-guard-reassign.ll b/llvm/test/CodeGen/ARM/stack-guard-reassign.ll
index f2d9a5c0f7fd..02ee9c067f22 100644
--- a/llvm/test/CodeGen/ARM/stack-guard-reassign.ll
+++ b/llvm/test/CodeGen/ARM/stack-guard-reassign.ll
@@ -3,12 +3,11 @@
; Verify that the offset assigned to the stack protector is at the top of the
; frame, covering the locals.
; CHECK-LABEL: fn:
-; CHECK: sub sp, sp, #24
+; CHECK: sub sp, sp, #32
; CHECK-NEXT: sub sp, sp, #65536
; CHECK-NEXT: ldr r1, .LCPI0_0
-; CHECK-NEXT: str r1, [sp, #8]
-; CHECK-NEXT: ldr r1, [r1]
+; CHECK-NEXT: ldr r2, [r1]
; CHECK-NEXT: add lr, sp, #65536
-; CHECK-NEXT: str r1, [lr, #20]
+; CHECK-NEXT: str r2, [lr, #28]
; CHECK: .LCPI0_0:
; CHECK-NEXT: .long __stack_chk_guard
diff --git a/llvm/test/CodeGen/ARM/swifterror.ll b/llvm/test/CodeGen/ARM/swifterror.ll
index 7968230ccab2..d96bc0249b42 100644
--- a/llvm/test/CodeGen/ARM/swifterror.ll
+++ b/llvm/test/CodeGen/ARM/swifterror.ll
@@ -21,9 +21,9 @@ define float @foo(%swift_error** swifterror %error_ptr_ref) {
; CHECK-O0: mov r{{.*}}, #16
; CHECK-O0: malloc
; CHECK-O0: mov [[ID2:r[0-9]+]], r0
-; CHECK-O0: mov r8, [[ID2]]
; CHECK-O0: mov [[ID:r[0-9]+]], #1
-; CHECK-O0: strb [[ID]], {{\[}}[[ID2]], #8]
+; CHECK-O0: strb [[ID]], [r0, #8]
+; CHECK-O0: mov r8, [[ID2]]
entry:
%call = call i8* @malloc(i64 16)
%call.0 = bitcast i8* %call to %swift_error*
@@ -49,16 +49,16 @@ define float @caller(i8* %error_ref) {
; CHECK-O0-LABEL: caller:
; spill r0
; CHECK-O0-DAG: mov r8, #0
-; CHECK-O0-DAG: str r0, [sp[[SLOT:(, #[0-9]+)?]]]
+; CHECK-O0-DAG: str r0, [sp, [[SLOT:#[0-9]+]]
; CHECK-O0: bl {{.*}}foo
; CHECK-O0: mov [[TMP:r[0-9]+]], r8
-; CHECK-O0: str [[TMP]], [sp[[SLOT2:(, #[0-9]+)?]]]
+; CHECK-O0: str [[TMP]], [sp]
; CHECK-O0: bne
-; CHECK-O0: ldr [[ID:r[0-9]+]], [sp[[SLOT]]]
; CHECK-O0: ldrb [[CODE:r[0-9]+]], [r0, #8]
+; CHECK-O0: ldr [[ID:r[0-9]+]], [sp, [[SLOT]]]
; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]]
; reload r0
-; CHECK-O0: ldr r0, [sp[[SLOT2]]]
+; CHECK-O0: ldr r0, [sp]
; CHECK-O0: free
entry:
%error_ptr_ref = alloca swifterror %swift_error*
@@ -98,14 +98,14 @@ define float @caller2(i8* %error_ref) {
; CHECK-O0-DAG: mov r8, #0
; CHECK-O0: bl {{.*}}foo
; CHECK-O0: mov r{{.*}}, r8
-; CHECK-O0: str r0, [sp{{(, #[0-9]+)?}}]
+; CHECK-O0: str r0, [sp]
; CHECK-O0: bne
; CHECK-O0: ble
+; CHECK-O0: ldrb [[CODE:r[0-9]+]], [r0, #8]
; reload r0
; CHECK-O0: ldr [[ID:r[0-9]+]],
-; CHECK-O0: ldrb [[CODE:r[0-9]+]], [r0, #8]
; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-O0: ldr r0, [sp{{(, #[0-9]+)?}}]
+; CHECK-O0: ldr r0, [sp]
; CHECK-O0: free
entry:
%error_ptr_ref = alloca swifterror %swift_error*
@@ -143,15 +143,16 @@ define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) {
; CHECK-APPLE-DAG: strb [[ID]], [r{{.*}}, #8]
; CHECK-O0-LABEL: foo_if:
+; CHECK-O0: cmp r0, #0
; spill to stack
; CHECK-O0: str r8
-; CHECK-O0: cmp r0, #0
; CHECK-O0: beq
; CHECK-O0: mov r0, #16
; CHECK-O0: malloc
; CHECK-O0: mov [[ID:r[0-9]+]], r0
; CHECK-O0: mov [[ID2:[a-z0-9]+]], #1
-; CHECK-O0: strb [[ID2]], {{\[}}[[ID]], #8]
+; CHECK-O0: strb [[ID2]], [r0, #8]
+; CHECK-O0: mov r8, [[ID]]
; reload from stack
; CHECK-O0: ldr r8
entry:
@@ -232,18 +233,18 @@ define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swi
; CHECK-APPLE-DAG: str r{{.*}}, [{{.*}}[[SRET]], #4]
; CHECK-O0-LABEL: foo_sret:
-; CHECK-O0-DAG: mov r{{.*}}, #16
+; CHECK-O0: mov r{{.*}}, #16
; spill to stack: sret and val1
; CHECK-O0-DAG: str r0
; CHECK-O0-DAG: str r1
; CHECK-O0: malloc
+; CHECK-O0: mov [[ID:r[0-9]+]], #1
+; CHECK-O0: strb [[ID]], [r0, #8]
; reload from stack: sret and val1
; CHECK-O0: ldr
; CHECK-O0: ldr
-; CHECK-O0-DAG: mov r8
-; CHECK-O0-DAG: mov [[ID:r[0-9]+]], #1
-; CHECK-O0-DAG: strb [[ID]], [{{r[0-9]+}}, #8]
-; CHECK-O0-DAG: str r{{.*}}, [{{.*}}, #4]
+; CHECK-O0: str r{{.*}}, [{{.*}}, #4]
+; CHECK-O0: mov r8
entry:
%call = call i8* @malloc(i64 16)
%call.0 = bitcast i8* %call to %swift_error*
@@ -270,15 +271,16 @@ define float @caller3(i8* %error_ref) {
; CHECK-O0-LABEL: caller3:
; CHECK-O0-DAG: mov r8, #0
+; CHECK-O0-DAG: mov r0
; CHECK-O0-DAG: mov r1
; CHECK-O0: bl {{.*}}foo_sret
; CHECK-O0: mov [[ID2:r[0-9]+]], r8
-; CHECK-O0: str [[ID2]], [sp[[SLOT:.*]]]
; CHECK-O0: cmp r8
+; CHECK-O0: str [[ID2]], [sp[[SLOT:.*]]]
; CHECK-O0: bne
; Access part of the error object and save it to error_ref
-; CHECK-O0: ldr [[ID:r[0-9]+]]
; CHECK-O0: ldrb [[CODE:r[0-9]+]]
+; CHECK-O0: ldr [[ID:r[0-9]+]]
; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]]
; CHECK-O0: ldr r0, [sp[[SLOT]]
; CHECK-O0: bl {{.*}}free
diff --git a/llvm/test/CodeGen/ARM/thumb-big-stack.ll b/llvm/test/CodeGen/ARM/thumb-big-stack.ll
index e5cbb9747a7e..954c12634cff 100644
--- a/llvm/test/CodeGen/ARM/thumb-big-stack.ll
+++ b/llvm/test/CodeGen/ARM/thumb-big-stack.ll
@@ -12,7 +12,7 @@ target triple = "thumbv7s-apple-ios"
; CHECK: movw [[ADDR:(r[0-9]+|lr)]], #
; CHECK-NEXT: add [[ADDR]], sp
; CHECK-NEXT: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, {{\[}}[[ADDR]]:128]
-define <4 x float> @f(<4 x float> %x) {
+define <4 x float> @f(<4 x float> %x, float %val) {
entry:
%.compoundliteral7837 = alloca <4 x float>, align 16
%.compoundliteral7865 = alloca <4 x float>, align 16
@@ -143,9 +143,9 @@ entry:
%.compoundliteral13969 = alloca <4 x float>, align 16
%.compoundliteral13983 = alloca <4 x float>, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x40746999A0000000, float 0xC0719B3340000000, float 0xC070B66660000000, float 0xC07404CCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x40746999A0000000, float 0xC0719B3340000000, float 0xC070B66660000000, float 0xC07404CCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x40701B3340000000, float 0x405B866660000000, float 0xC0763999A0000000, float 4.895000e+02>, <4 x float>* undef
+ store volatile <4 x float> <float 0x40701B3340000000, float 0x405B866660000000, float 0xC0763999A0000000, float 4.895000e+02>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -153,17 +153,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add68 = fadd <4 x float> %tmp1, %tmp
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add68, <4 x float>* undef, align 16
+ store volatile <4 x float> %add68, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp2 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add76 = fadd float undef, 0x4074C999A0000000
+ %add76 = fadd float %val, 0x4074C999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp3 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins77 = insertelement <4 x float> %tmp3, float %add76, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins77, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins77, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp4 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -175,15 +175,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins80 = insertelement <4 x float> %tmp5, float %add79, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins80, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins80, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x40678CCCC0000000, float 0xC03E4CCCC0000000, float -4.170000e+02, float -1.220000e+02>, <4 x float>* undef
+ store volatile <4 x float> <float 0x40678CCCC0000000, float 0xC03E4CCCC0000000, float -4.170000e+02, float -1.220000e+02>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp6 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add82 = fadd <4 x float> undef, %tmp6
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add82, <4 x float>* undef, align 16
+ store volatile <4 x float> %add82, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp7 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -195,19 +195,19 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins85 = insertelement <4 x float> %tmp8, float %add84, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins85, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins85, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp9 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext86 = extractelement <4 x float> %tmp9, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add93 = fadd float undef, 0xC076C66660000000
+ %add93 = fadd float %val, 0xC076C66660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp10 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins94 = insertelement <4 x float> %tmp10, float %add93, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x406C2999A0000000, float 8.050000e+01, float 0xC0794999A0000000, float 0xC073E4CCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x406C2999A0000000, float 8.050000e+01, float 0xC0794999A0000000, float 0xC073E4CCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp11 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -223,17 +223,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp14 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins102 = insertelement <4 x float> undef, float undef, i32 1
+ %vecins102 = insertelement <4 x float> undef, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins102, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins102, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp15 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add104 = fadd float undef, 0x406AB999A0000000
+ %add104 = fadd float %val, 0x406AB999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp16 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC0531999A0000000, float 0xC0737999A0000000, float 0x407CB33340000000, float 0xC06DCCCCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC0531999A0000000, float 0xC0737999A0000000, float 0x407CB33340000000, float 0xC06DCCCCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext579 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -243,7 +243,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins581 = insertelement <4 x float> %tmp17, float %add580, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins581, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins581, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp18 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -251,7 +251,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add583 = fadd float %vecext582, 0x40444CCCC0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp19 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -261,25 +261,25 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins592 = insertelement <4 x float> undef, float %add591, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins592, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins592, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp20 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add594 = fadd float undef, 0xC05B466660000000
+ %add594 = fadd float %val, 0xC05B466660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add605 = fadd float undef, 0x407164CCC0000000
+ %add605 = fadd float %val, 0x407164CCC0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp21 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add616 = fadd float undef, 1.885000e+02
+ %add616 = fadd float %val, 1.885000e+02
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp22 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp23 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins620 = insertelement <4 x float> undef, float undef, i32 1
+ %vecins620 = insertelement <4 x float> undef, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins620, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins620, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext621 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -287,7 +287,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins623 = insertelement <4 x float> undef, float %add622, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins623, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins623, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp24 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -299,9 +299,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins626 = insertelement <4 x float> %tmp25, float %add625, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins626, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins626, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x404D0CCCC0000000, float 3.955000e+02, float 0xC0334CCCC0000000, float 0x40754E6660000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x404D0CCCC0000000, float 3.955000e+02, float 0xC0334CCCC0000000, float 0x40754E6660000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp26 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -309,7 +309,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add628 = fadd <4 x float> %tmp27, %tmp26
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add628, <4 x float>* undef, align 16
+ store volatile <4 x float> %add628, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp28 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -321,7 +321,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins631 = insertelement <4 x float> %tmp29, float %add630, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins631, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins631, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp30 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -333,7 +333,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins634 = insertelement <4 x float> %tmp31, float %add633, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins634, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins634, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp32 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -347,13 +347,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp35 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add658 = fadd float undef, 0xC04A4CCCC0000000
+ %add658 = fadd float %val, 0xC04A4CCCC0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext663 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp36 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins665 = insertelement <4 x float> %tmp36, float undef, i32 2
+ %vecins665 = insertelement <4 x float> %tmp36, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext694 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -363,31 +363,31 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins696 = insertelement <4 x float> %tmp37, float %add695, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins696, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins696, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC069FCCCC0000000, float 0xC07C6E6660000000, float 0x4067E33340000000, float 0x4078DB3340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC069FCCCC0000000, float 0xC07C6E6660000000, float 0x4067E33340000000, float 0x4078DB3340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp38 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext699 = extractelement <4 x float> undef, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add703 = fadd float undef, 0x4068F33340000000
+ %add703 = fadd float %val, 0x4068F33340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins704 = insertelement <4 x float> undef, float %add703, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins704, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins704, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp39 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp40 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins710 = insertelement <4 x float> %tmp40, float undef, i32 3
+ %vecins710 = insertelement <4 x float> %tmp40, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins710, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins710, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC05D9999A0000000, float 0x405D6CCCC0000000, float 0x40765CCCC0000000, float 0xC07C64CCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC05D9999A0000000, float 0x405D6CCCC0000000, float 0x40765CCCC0000000, float 0xC07C64CCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp41 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -395,7 +395,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add712 = fadd <4 x float> %tmp42, %tmp41
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add712, <4 x float>* undef, align 16
+ store volatile <4 x float> %add712, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp43 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -403,7 +403,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp44 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins715 = insertelement <4 x float> %tmp44, float undef, i32 0
+ %vecins715 = insertelement <4 x float> %tmp44, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp45 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -415,19 +415,19 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins718 = insertelement <4 x float> %tmp46, float %add717, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins718, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins718, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp47 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext719 = extractelement <4 x float> %tmp47, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add723 = fadd float undef, 0xC06A6CCCC0000000
+ %add723 = fadd float %val, 0xC06A6CCCC0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins724 = insertelement <4 x float> undef, float %add723, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add726 = fadd <4 x float> undef, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext730 = extractelement <4 x float> undef, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -437,19 +437,19 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins732 = insertelement <4 x float> %tmp48, float %add731, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins732, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins732, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp49 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext733 = extractelement <4 x float> %tmp49, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp50 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins738 = insertelement <4 x float> %tmp50, float undef, i32 3
+ %vecins738 = insertelement <4 x float> %tmp50, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x406E6CCCC0000000, float 0xC07A766660000000, float 0xC0608CCCC0000000, float 0xC063333340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x406E6CCCC0000000, float 0xC07A766660000000, float 0xC0608CCCC0000000, float 0xC063333340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp51 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -465,7 +465,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins743 = insertelement <4 x float> %tmp53, float %add742, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins743, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins743, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp54 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -473,7 +473,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add754 = fadd <4 x float> %tmp55, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add754, <4 x float>* undef, align 16
+ store volatile <4 x float> %add754, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp56 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -485,7 +485,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins757 = insertelement <4 x float> %tmp57, float %add756, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add765 = fadd float undef, 0x405BA66660000000
+ %add765 = fadd float %val, 0x405BA66660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp58 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -501,11 +501,11 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins771 = insertelement <4 x float> %tmp60, float %add770, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins771, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins771, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp61 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add776 = fadd float undef, 0xC055F33340000000
+ %add776 = fadd float %val, 0xC055F33340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins777 = insertelement <4 x float> undef, float %add776, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -515,7 +515,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add782 = fadd <4 x float> %tmp63, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add782, <4 x float>* undef, align 16
+ store volatile <4 x float> %add782, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp64 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -523,25 +523,25 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add784 = fadd float %vecext783, -3.455000e+02
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC07A866660000000, float 0xC05CF999A0000000, float 0xC0757199A0000000, float -3.845000e+02>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC07A866660000000, float 0xC05CF999A0000000, float 0xC0757199A0000000, float -3.845000e+02>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add796 = fadd <4 x float> undef, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add796, <4 x float>* undef, align 16
+ store volatile <4 x float> %add796, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp65 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add801 = fadd float undef, 3.045000e+02
+ %add801 = fadd float %val, 3.045000e+02
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp66 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins802 = insertelement <4 x float> %tmp66, float %add801, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins802, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins802, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext803 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp67 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -549,7 +549,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add810 = fadd <4 x float> undef, %tmp68
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add810, <4 x float>* undef, align 16
+ store volatile <4 x float> %add810, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp69 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -557,17 +557,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp70 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins813 = insertelement <4 x float> %tmp70, float undef, i32 0
+ %vecins813 = insertelement <4 x float> %tmp70, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext817 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add818 = fadd float %vecext817, -4.830000e+02
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins822 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins822 = insertelement <4 x float> undef, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins822, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins822, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 2.700000e+01, float 0xC05F666660000000, float 0xC07D0199A0000000, float 0x407A6CCCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 2.700000e+01, float 0xC05F666660000000, float 0xC07D0199A0000000, float 0x407A6CCCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp71 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -577,17 +577,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add838 = fadd <4 x float> undef, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add838, <4 x float>* undef, align 16
+ store volatile <4 x float> %add838, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp73 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext839 = extractelement <4 x float> %tmp73, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add849 = fadd float undef, 0xC07C266660000000
+ %add849 = fadd float %val, 0xC07C266660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC07D566660000000, float 0xC06D233340000000, float 0x4068B33340000000, float 0xC07ADCCCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC07D566660000000, float 0xC06D233340000000, float 0x4068B33340000000, float 0xC07ADCCCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp74 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -609,9 +609,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins861 = insertelement <4 x float> %tmp77, float %add860, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins889 = insertelement <4 x float> undef, float undef, i32 2
+ %vecins889 = insertelement <4 x float> undef, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins889, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins889, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp78 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -623,9 +623,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins892 = insertelement <4 x float> %tmp79, float %add891, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins892, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins892, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x4063D33340000000, float 0xC076433340000000, float 0x407C966660000000, float 0xC07B5199A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x4063D33340000000, float 0xC076433340000000, float 0x407C966660000000, float 0xC07B5199A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp80 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -633,7 +633,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add894 = fadd <4 x float> %tmp81, %tmp80
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add894, <4 x float>* undef, align 16
+ store volatile <4 x float> %add894, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext895 = extractelement <4 x float> undef, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -659,7 +659,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins903 = insertelement <4 x float> %tmp84, float %add902, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins903, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins903, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext904 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -669,7 +669,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins906 = insertelement <4 x float> %tmp85, float %add905, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC07EFCCCC0000000, float 1.795000e+02, float 0x407E3E6660000000, float 0x4070633340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC07EFCCCC0000000, float 1.795000e+02, float 0x407E3E6660000000, float 0x4070633340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp86 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -677,13 +677,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add908 = fadd <4 x float> %tmp87, %tmp86
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add908, <4 x float>* undef, align 16
+ store volatile <4 x float> %add908, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp88 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp89 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp90 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -703,7 +703,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins917 = insertelement <4 x float> %tmp92, float %add916, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins917, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins917, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp93 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -715,17 +715,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins920 = insertelement <4 x float> %tmp94, float %add919, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins920, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins920, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp95 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins925 = insertelement <4 x float> %tmp95, float undef, i32 0
+ %vecins925 = insertelement <4 x float> %tmp95, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins925, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins925, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp96 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add927 = fadd float undef, 0xC0501999A0000000
+ %add927 = fadd float %val, 0xC0501999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp97 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -739,7 +739,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins931 = insertelement <4 x float> %tmp98, float %add930, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC047B33340000000, float 0x404ACCCCC0000000, float 0x40708E6660000000, float 0x4060F999A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC047B33340000000, float 0x404ACCCCC0000000, float 0x40708E6660000000, float 0x4060F999A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp99 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -747,11 +747,11 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext937 = extractelement <4 x float> %tmp100, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add941 = fadd float undef, -4.665000e+02
+ %add941 = fadd float %val, -4.665000e+02
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins942 = insertelement <4 x float> undef, float %add941, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins942, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins942, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp101 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -763,29 +763,29 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins945 = insertelement <4 x float> %tmp102, float %add944, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins945, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins945, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp103 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add947 = fadd float undef, 0xC051933340000000
+ %add947 = fadd float %val, 0xC051933340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp104 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins948 = insertelement <4 x float> %tmp104, float %add947, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins948, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins948, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x4060CCCCC0000000, float 0xC07BAB3340000000, float 0xC061233340000000, float 0xC076C199A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x4060CCCCC0000000, float 0xC07BAB3340000000, float 0xC061233340000000, float 0xC076C199A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp105 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add955 = fadd float undef, 0x4077F4CCC0000000
+ %add955 = fadd float %val, 0x4077F4CCC0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp106 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins956 = insertelement <4 x float> %tmp106, float %add955, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins956, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins956, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext971 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -795,17 +795,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins973 = insertelement <4 x float> %tmp107, float %add972, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins973, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins973, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp108 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext974 = extractelement <4 x float> %tmp108, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins976 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins976 = insertelement <4 x float> undef, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins976, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins976, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x407E266660000000, float -1.225000e+02, float 0x407EB199A0000000, float 0x407BA199A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x407E266660000000, float -1.225000e+02, float 0x407EB199A0000000, float 0x407BA199A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp109 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -817,7 +817,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp112 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext982 = extractelement <4 x float> undef, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -825,7 +825,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins984 = insertelement <4 x float> undef, float %add983, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins984, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins984, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp113 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -837,25 +837,25 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins987 = insertelement <4 x float> %tmp114, float %add986, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins987, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins987, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp115 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp116 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins995 = insertelement <4 x float> %tmp116, float undef, i32 0
+ %vecins995 = insertelement <4 x float> %tmp116, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins995, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins995, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp117 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add997 = fadd float undef, 0xC0798999A0000000
+ %add997 = fadd float %val, 0xC0798999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp118 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins998 = insertelement <4 x float> %tmp118, float %add997, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins998, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins998, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp119 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -865,7 +865,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp120 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp121 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -879,13 +879,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add1031 = fadd float %vecext1030, 2.010000e+02
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp123 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp124 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1085 = insertelement <4 x float> %tmp124, float undef, i32 2
+ %vecins1085 = insertelement <4 x float> %tmp124, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp125 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -897,13 +897,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1088 = insertelement <4 x float> %tmp126, float %add1087, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1088, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1088, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp127 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add1090 = fadd <4 x float> undef, %tmp127
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp128 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -915,7 +915,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1096 = insertelement <4 x float> %tmp129, float %add1095, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1096, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1096, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp130 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -927,7 +927,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1099 = insertelement <4 x float> %tmp131, float %add1098, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1099, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1099, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp132 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -939,9 +939,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1102 = insertelement <4 x float> %tmp133, float %add1101, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1102, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1102, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x4059866660000000, float 0x4072466660000000, float 0xC078FE6660000000, float 0xC058ACCCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x4059866660000000, float 0x4072466660000000, float 0xC078FE6660000000, float 0xC058ACCCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp134 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -961,9 +961,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp137 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1110 = insertelement <4 x float> %tmp137, float undef, i32 1
+ %vecins1110 = insertelement <4 x float> %tmp137, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1110, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1110, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp138 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -975,21 +975,21 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1113 = insertelement <4 x float> %tmp139, float %add1112, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1113, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1113, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add1115 = fadd float undef, 0x4072B33340000000
+ %add1115 = fadd float %val, 0x4072B33340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1116 = insertelement <4 x float> undef, float %add1115, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1116, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1116, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC0721999A0000000, float 0x4075633340000000, float 0x40794199A0000000, float 0x4061066660000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC0721999A0000000, float 0x4075633340000000, float 0x40794199A0000000, float 0x4061066660000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp140 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add1118 = fadd <4 x float> %tmp140, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add1118, <4 x float>* undef, align 16
+ store volatile <4 x float> %add1118, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp141 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -999,7 +999,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1121 = insertelement <4 x float> undef, float %add1120, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1121, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1121, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp142 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1013,9 +1013,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext1125 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1127 = insertelement <4 x float> undef, float undef, i32 2
+ %vecins1127 = insertelement <4 x float> undef, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1127, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1127, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp144 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1027,7 +1027,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1130 = insertelement <4 x float> %tmp145, float %add1129, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC06D6CCCC0000000, float 0xC032E66660000000, float -1.005000e+02, float 0x40765B3340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC06D6CCCC0000000, float 0xC032E66660000000, float -1.005000e+02, float 0x40765B3340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp146 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1045,7 +1045,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1135 = insertelement <4 x float> %tmp149, float %add1134, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1135, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1135, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp150 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1053,13 +1053,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp151 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1138 = insertelement <4 x float> %tmp151, float undef, i32 1
+ %vecins1138 = insertelement <4 x float> %tmp151, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1138, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1138, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp152 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add1140 = fadd float undef, 0x407AE999A0000000
+ %add1140 = fadd float %val, 0x407AE999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp153 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1073,7 +1073,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1144 = insertelement <4 x float> %tmp154, float %add1143, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1144, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1144, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp155 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1081,27 +1081,27 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add1146 = fadd <4 x float> %tmp156, %tmp155
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add1146, <4 x float>* undef, align 16
+ store volatile <4 x float> %add1146, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp157 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add1148 = fadd float undef, 4.145000e+02
+ %add1148 = fadd float %val, 4.145000e+02
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp158 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1158 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins1158 = insertelement <4 x float> undef, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1158, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1158, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x40603999A0000000, float -9.150000e+01, float 0xC051E66660000000, float -4.825000e+02>, <4 x float>* undef
+ store volatile <4 x float> <float 0x40603999A0000000, float -9.150000e+01, float 0xC051E66660000000, float -4.825000e+02>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add1218 = fadd float undef, 0xC078733340000000
+ %add1218 = fadd float %val, 0xC078733340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1219 = insertelement <4 x float> undef, float %add1218, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC0655CCCC0000000, float -4.900000e+01, float -4.525000e+02, float 4.205000e+02>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC0655CCCC0000000, float -4.900000e+01, float -4.525000e+02, float 4.205000e+02>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp159 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1113,7 +1113,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1281 = insertelement <4 x float> %tmp160, float %add1280, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1281, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1281, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp161 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1125,7 +1125,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1284 = insertelement <4 x float> %tmp162, float %add1283, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1284, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1284, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp163 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1133,27 +1133,27 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add1286 = fadd <4 x float> %tmp164, %tmp163
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add1286, <4 x float>* undef, align 16
+ store volatile <4 x float> %add1286, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp165 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add1288 = fadd float undef, 0xC0731199A0000000
+ %add1288 = fadd float %val, 0xC0731199A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp166 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp167 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext1444 = extractelement <4 x float> %tmp167, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1460 = insertelement <4 x float> undef, float undef, i32 1
+ %vecins1460 = insertelement <4 x float> undef, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1460, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1460, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp168 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add1462 = fadd float undef, -1.670000e+02
+ %add1462 = fadd float %val, -1.670000e+02
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1463 = insertelement <4 x float> undef, float %add1462, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1167,9 +1167,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1466 = insertelement <4 x float> %tmp170, float %add1465, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1466, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1466, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 3.885000e+02, float 0x4054266660000000, float -9.500000e+01, float 8.500000e+01>, <4 x float>* undef
+ store volatile <4 x float> <float 3.885000e+02, float 0x4054266660000000, float -9.500000e+01, float 8.500000e+01>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp171 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1177,17 +1177,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add1468 = fadd <4 x float> %tmp172, %tmp171
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add1468, <4 x float>* undef, align 16
+ store volatile <4 x float> %add1468, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp173 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add1470 = fadd float undef, 0x4033B33340000000
+ %add1470 = fadd float %val, 0x4033B33340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp174 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1471 = insertelement <4 x float> %tmp174, float %add1470, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1471, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1471, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp175 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1205,9 +1205,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp178 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1477 = insertelement <4 x float> %tmp178, float undef, i32 2
+ %vecins1477 = insertelement <4 x float> %tmp178, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1477, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1477, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp179 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1219,15 +1219,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1480 = insertelement <4 x float> %tmp180, float %add1479, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1480, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1480, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC061B33340000000, float 3.290000e+02, float 0xC067766660000000, float 0x407DB33340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC061B33340000000, float 3.290000e+02, float 0xC067766660000000, float 0x407DB33340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp181 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp182 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp183 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1241,9 +1241,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext1486 = extractelement <4 x float> %tmp185, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1502 = insertelement <4 x float> undef, float undef, i32 1
+ %vecins1502 = insertelement <4 x float> undef, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1502, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1502, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext1503 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1253,7 +1253,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1505 = insertelement <4 x float> %tmp186, float %add1504, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1505, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1505, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp187 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1265,9 +1265,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1508 = insertelement <4 x float> %tmp188, float %add1507, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1508, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1508, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x40762B3340000000, float 0xC074566660000000, float 0xC07C74CCC0000000, float 0xC053F999A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x40762B3340000000, float 0xC074566660000000, float 0xC07C74CCC0000000, float 0xC053F999A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp189 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1275,7 +1275,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add1510 = fadd <4 x float> %tmp190, %tmp189
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add1510, <4 x float>* undef, align 16
+ store volatile <4 x float> %add1510, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp191 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1289,13 +1289,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1656 = insertelement <4 x float> %tmp193, float %add1655, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1656, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1656, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add1658 = fadd float undef, 0x40709999A0000000
+ %add1658 = fadd float %val, 0x40709999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp194 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext1660 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1305,19 +1305,19 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1662 = insertelement <4 x float> %tmp195, float %add1661, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1662, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1662, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC075266660000000, float 0xC072C4CCC0000000, float 0x407C4E6660000000, float -4.485000e+02>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC075266660000000, float 0xC072C4CCC0000000, float 0x407C4E6660000000, float -4.485000e+02>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1676 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins1676 = insertelement <4 x float> undef, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp196 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add1692 = fadd <4 x float> %tmp196, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add1692, <4 x float>* undef, align 16
+ store volatile <4 x float> %add1692, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp197 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1329,7 +1329,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1695 = insertelement <4 x float> %tmp198, float %add1694, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1695, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1695, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp199 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1341,7 +1341,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1698 = insertelement <4 x float> %tmp200, float %add1697, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1698, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1698, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp201 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1349,15 +1349,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp202 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1701 = insertelement <4 x float> %tmp202, float undef, i32 2
+ %vecins1701 = insertelement <4 x float> %tmp202, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1701, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1701, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp203 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1704 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins1704 = insertelement <4 x float> undef, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC075933340000000, float 0xC0489999A0000000, float 0xC078AB3340000000, float 0x406DFCCCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC075933340000000, float 0xC0489999A0000000, float 0xC078AB3340000000, float 0x406DFCCCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp204 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1365,9 +1365,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp206 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1709 = insertelement <4 x float> %tmp206, float undef, i32 0
+ %vecins1709 = insertelement <4 x float> %tmp206, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1709, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1709, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp207 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1375,11 +1375,11 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add1714 = fadd float %vecext1713, 0xC0703199A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1723 = insertelement <4 x float> undef, float undef, i32 0
+ %vecins1723 = insertelement <4 x float> undef, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp208 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext1730 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1389,9 +1389,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1732 = insertelement <4 x float> %tmp209, float %add1731, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1732, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1732, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x40551999A0000000, float 0xC0708999A0000000, float 0xC054F33340000000, float 0xC07C5999A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x40551999A0000000, float 0xC0708999A0000000, float 0xC054F33340000000, float 0xC07C5999A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp210 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1399,7 +1399,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp211 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add1736 = fadd float undef, 0x407C3999A0000000
+ %add1736 = fadd float %val, 0x407C3999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp212 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1415,7 +1415,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1740 = insertelement <4 x float> %tmp214, float %add1739, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1740, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1740, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp215 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1427,25 +1427,25 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1743 = insertelement <4 x float> %tmp216, float %add1742, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1743, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1743, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext1744 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp217 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1746 = insertelement <4 x float> %tmp217, float undef, i32 3
+ %vecins1746 = insertelement <4 x float> %tmp217, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC076466660000000, float 0x4060BCCCC0000000, float 0x405EF999A0000000, float 0x4074766660000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC076466660000000, float 0x4060BCCCC0000000, float 0x405EF999A0000000, float 0x4074766660000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp218 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add1748 = fadd <4 x float> undef, %tmp218
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add1748, <4 x float>* undef, align 16
+ store volatile <4 x float> %add1748, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp219 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add1750 = fadd float undef, 0x407C6B3340000000
+ %add1750 = fadd float %val, 0x407C6B3340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1751 = insertelement <4 x float> undef, float %add1750, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1467,21 +1467,21 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp223 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add1759 = fadd float undef, 0x40678999A0000000
+ %add1759 = fadd float %val, 0x40678999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp224 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1760 = insertelement <4 x float> %tmp224, float %add1759, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1760, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1760, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x405E333340000000, float 0x40571999A0000000, float 0xC02E333340000000, float 0x4053A66660000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x405E333340000000, float 0x40571999A0000000, float 0xC02E333340000000, float 0x4053A66660000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp225 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add1762 = fadd <4 x float> undef, %tmp225
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add1762, <4 x float>* undef, align 16
+ store volatile <4 x float> %add1762, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp226 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1493,7 +1493,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1765 = insertelement <4 x float> %tmp227, float %add1764, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1765, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1765, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp228 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1505,7 +1505,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1768 = insertelement <4 x float> %tmp229, float %add1767, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1768, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1768, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext1769 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1515,7 +1515,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1771 = insertelement <4 x float> %tmp230, float %add1770, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1771, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1771, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp231 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1525,13 +1525,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp234 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1779 = insertelement <4 x float> %tmp234, float undef, i32 0
+ %vecins1779 = insertelement <4 x float> %tmp234, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1779, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1779, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp235 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp236 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1541,9 +1541,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1785 = insertelement <4 x float> undef, float %add1784, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1785, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1785, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC07074CCC0000000, float 0xC04D666660000000, float 3.235000e+02, float 0xC0724199A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC07074CCC0000000, float 0xC04D666660000000, float 3.235000e+02, float 0xC0724199A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp237 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1559,25 +1559,25 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1793 = insertelement <4 x float> %tmp239, float %add1792, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1793, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1793, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp240 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add1795 = fadd float undef, 0x4055266660000000
+ %add1795 = fadd float %val, 0x4055266660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp241 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1796 = insertelement <4 x float> %tmp241, float %add1795, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1799 = insertelement <4 x float> undef, float undef, i32 2
+ %vecins1799 = insertelement <4 x float> undef, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext1800 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp242 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float -6.600000e+01, float 0xC07B2199A0000000, float 0x4011333340000000, float 0xC0635CCCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float -6.600000e+01, float 0xC07B2199A0000000, float 0x4011333340000000, float 0xC0635CCCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp243 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1587,7 +1587,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp246 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add1865 = fadd float undef, -2.235000e+02
+ %add1865 = fadd float %val, -2.235000e+02
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp247 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1597,33 +1597,33 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp249 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1872 = insertelement <4 x float> %tmp249, float undef, i32 3
+ %vecins1872 = insertelement <4 x float> %tmp249, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x406B8999A0000000, float 0xC0696CCCC0000000, float 0xC07A34CCC0000000, float 0x407654CCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x406B8999A0000000, float 0xC0696CCCC0000000, float 0xC07A34CCC0000000, float 0x407654CCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp250 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add1874 = fadd <4 x float> %tmp250, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add1874, <4 x float>* undef, align 16
+ store volatile <4 x float> %add1874, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext1875 = extractelement <4 x float> undef, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp251 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1894 = insertelement <4 x float> %tmp251, float undef, i32 1
+ %vecins1894 = insertelement <4 x float> %tmp251, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp252 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext1895 = extractelement <4 x float> %tmp252, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1900 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins1900 = insertelement <4 x float> undef, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1900, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1900, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1905 = insertelement <4 x float> undef, float undef, i32 0
+ %vecins1905 = insertelement <4 x float> undef, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1905, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1905, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp253 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1633,7 +1633,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1908 = insertelement <4 x float> undef, float %add1907, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1908, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1908, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext1909 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1649,23 +1649,23 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add1916 = fadd <4 x float> %tmp256, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add1916, <4 x float>* undef, align 16
+ store volatile <4 x float> %add1916, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext1923 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp257 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add1927 = fadd float undef, 0x40761999A0000000
+ %add1927 = fadd float %val, 0x40761999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp258 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1928 = insertelement <4 x float> %tmp258, float %add1927, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1928, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1928, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 7.100000e+01, float 0xC0634999A0000000, float 0x407B0B3340000000, float 0xC07DE999A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 7.100000e+01, float 0xC0634999A0000000, float 0x407B0B3340000000, float 0xC07DE999A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp259 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1677,9 +1677,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp262 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1933 = insertelement <4 x float> %tmp262, float undef, i32 0
+ %vecins1933 = insertelement <4 x float> %tmp262, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1933, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1933, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp263 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1693,15 +1693,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext1940 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1942 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins1942 = insertelement <4 x float> undef, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float -8.200000e+01, float 0xC04C733340000000, float 0xC077ACCCC0000000, float 0x4074566660000000>, <4 x float>* undef
+ store volatile <4 x float> <float -8.200000e+01, float 0xC04C733340000000, float 0xC077ACCCC0000000, float 0x4074566660000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp265 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp266 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp267 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1709,13 +1709,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add1946 = fadd float %vecext1945, 0xC074866660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1953 = insertelement <4 x float> undef, float undef, i32 2
+ %vecins1953 = insertelement <4 x float> undef, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1953, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1953, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp268 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp269 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1737,15 +1737,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1964 = insertelement <4 x float> %tmp272, float %add1963, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1964, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1964, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext1965 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp273 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1967 = insertelement <4 x float> %tmp273, float undef, i32 2
+ %vecins1967 = insertelement <4 x float> %tmp273, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1967, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1967, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp274 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1757,9 +1757,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1970 = insertelement <4 x float> %tmp275, float %add1969, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1970, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1970, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x402E9999A0000000, float 0x407344CCC0000000, float -4.165000e+02, float 0x4078FCCCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x402E9999A0000000, float 0x407344CCC0000000, float -4.165000e+02, float 0x4078FCCCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp276 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1767,31 +1767,31 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp278 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1975 = insertelement <4 x float> %tmp278, float undef, i32 0
+ %vecins1975 = insertelement <4 x float> %tmp278, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1975, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1975, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp279 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext1976 = extractelement <4 x float> %tmp279, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1978 = insertelement <4 x float> undef, float undef, i32 1
+ %vecins1978 = insertelement <4 x float> undef, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1978, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1978, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext1979 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1981 = insertelement <4 x float> undef, float undef, i32 2
+ %vecins1981 = insertelement <4 x float> undef, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1981, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1981, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins1984 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins1984 = insertelement <4 x float> undef, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1984, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1984, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC06A766660000000, float 0xC07CE4CCC0000000, float -1.055000e+02, float 0x40786E6660000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC06A766660000000, float 0xC07CE4CCC0000000, float -1.055000e+02, float 0x40786E6660000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext1990 = extractelement <4 x float> undef, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1803,11 +1803,11 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins1998 = insertelement <4 x float> %tmp280, float %add1997, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins1998, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins1998, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC0794E6660000000, float 0xC073CCCCC0000000, float 0x407994CCC0000000, float 6.500000e+01>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC0794E6660000000, float 0xC073CCCCC0000000, float 0x407994CCC0000000, float 6.500000e+01>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext2004 = extractelement <4 x float> undef, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1817,7 +1817,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins2006 = insertelement <4 x float> %tmp281, float %add2005, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins2006, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins2006, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp282 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1825,7 +1825,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp283 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins2009 = insertelement <4 x float> %tmp283, float undef, i32 2
+ %vecins2009 = insertelement <4 x float> %tmp283, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp284 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1837,15 +1837,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins2012 = insertelement <4 x float> %tmp285, float %add2011, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins2012, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins2012, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC04E733340000000, float 0xC074566660000000, float 0x4079F66660000000, float 0xC0705B3340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC04E733340000000, float 0xC074566660000000, float 0x4079F66660000000, float 0xC0705B3340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp286 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp287 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp288 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1857,7 +1857,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins2017 = insertelement <4 x float> %tmp289, float %add2016, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add2022 = fadd float undef, 8.350000e+01
+ %add2022 = fadd float %val, 8.350000e+01
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp290 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1871,7 +1871,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add2028 = fadd <4 x float> %tmp292, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add2028, <4 x float>* undef, align 16
+ store volatile <4 x float> %add2028, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext2029 = extractelement <4 x float> undef, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1879,11 +1879,11 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp293 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp294 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add2036 = fadd float undef, 0x407DE66660000000
+ %add2036 = fadd float %val, 0x407DE66660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp295 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1895,9 +1895,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp299 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins2045 = insertelement <4 x float> %tmp299, float undef, i32 0
+ %vecins2045 = insertelement <4 x float> %tmp299, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins2045, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins2045, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp300 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1905,35 +1905,35 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add2047 = fadd float %vecext2046, 0xC065433340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext2052 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp301 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins2054 = insertelement <4 x float> %tmp301, float undef, i32 3
+ %vecins2054 = insertelement <4 x float> %tmp301, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins2054, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins2054, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x4024666660000000, float 0x4079366660000000, float 0x40721B3340000000, float 0x406E533340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x4024666660000000, float 0x4079366660000000, float 0x40721B3340000000, float 0x406E533340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp302 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add2056 = fadd <4 x float> undef, %tmp302
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add2056, <4 x float>* undef, align 16
+ store volatile <4 x float> %add2056, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp303 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp304 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins2062 = insertelement <4 x float> %tmp304, float undef, i32 1
+ %vecins2062 = insertelement <4 x float> %tmp304, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins2062, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins2062, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp305 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp306 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1943,9 +1943,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins2068 = insertelement <4 x float> undef, float %add2067, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins2068, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins2068, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC07EFCCCC0000000, float -3.420000e+02, float 0xC07BC999A0000000, float 0x40751999A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC07EFCCCC0000000, float -3.420000e+02, float 0xC07BC999A0000000, float 0x40751999A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp307 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1953,7 +1953,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add2070 = fadd <4 x float> %tmp308, %tmp307
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add2070, <4 x float>* undef, align 16
+ store volatile <4 x float> %add2070, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp309 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1965,7 +1965,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins2073 = insertelement <4 x float> %tmp310, float %add2072, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins2073, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins2073, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp311 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1973,7 +1973,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp312 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins2076 = insertelement <4 x float> %tmp312, float undef, i32 1
+ %vecins2076 = insertelement <4 x float> %tmp312, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp313 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1985,7 +1985,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins2079 = insertelement <4 x float> %tmp314, float %add2078, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins2079, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins2079, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp315 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1997,15 +1997,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins2082 = insertelement <4 x float> %tmp316, float %add2081, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins2082, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins2082, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x40764E6660000000, float 0x40501999A0000000, float 0xC079A4CCC0000000, float 0x4050533340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x40764E6660000000, float 0x40501999A0000000, float 0xC079A4CCC0000000, float 0x4050533340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp317 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp318 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp319 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2015,7 +2015,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins2087 = insertelement <4 x float> undef, float %add2086, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins2087, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins2087, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext2480 = extractelement <4 x float> undef, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2029,23 +2029,23 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins2485 = insertelement <4 x float> %tmp320, float %add2484, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins2485, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins2485, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp321 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add2487 = fadd float undef, 2.030000e+02
+ %add2487 = fadd float %val, 2.030000e+02
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp322 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x4073DE6660000000, float 0x4067CCCCC0000000, float 0xC03F1999A0000000, float 4.350000e+01>, <4 x float>* undef
+ store volatile <4 x float> <float 0x4073DE6660000000, float 0x4067CCCCC0000000, float 0xC03F1999A0000000, float 4.350000e+01>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext2491 = extractelement <4 x float> undef, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp323 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp324 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2055,9 +2055,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp325 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins2499 = insertelement <4 x float> undef, float undef, i32 2
+ %vecins2499 = insertelement <4 x float> undef, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins2499, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins2499, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext2500 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2079,7 +2079,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp329 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add2534 = fadd float undef, 0x4072C66660000000
+ %add2534 = fadd float %val, 0x4072C66660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext2536 = extractelement <4 x float> undef, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2089,15 +2089,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins2538 = insertelement <4 x float> %tmp330, float %add2537, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins2538, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins2538, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext2539 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add2540 = fadd float %vecext2539, 0x406F9999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins2580 = insertelement <4 x float> undef, float undef, i32 1
+ %vecins2580 = insertelement <4 x float> undef, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins2580, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins2580, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp331 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2107,7 +2107,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins2583 = insertelement <4 x float> undef, float %add2582, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins2583, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins2583, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext2584 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2115,21 +2115,21 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp332 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x40773199A0000000, float 0x407D7999A0000000, float 0xC0717199A0000000, float 0xC07E9CCCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x40773199A0000000, float 0x407D7999A0000000, float 0xC0717199A0000000, float 0xC07E9CCCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add2590 = fadd float undef, 0x407B1999A0000000
+ %add2590 = fadd float %val, 0x407B1999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp333 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp334 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add2672 = fadd <4 x float> undef, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add2672, <4 x float>* undef, align 16
+ store volatile <4 x float> %add2672, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp335 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2141,37 +2141,37 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins2678 = insertelement <4 x float> %tmp336, float %add2677, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins2678, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins2678, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp337 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext2679 = extractelement <4 x float> %tmp337, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins2681 = insertelement <4 x float> undef, float undef, i32 2
+ %vecins2681 = insertelement <4 x float> undef, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins2681, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins2681, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp338 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext2682 = extractelement <4 x float> %tmp338, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins2684 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins2684 = insertelement <4 x float> undef, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp339 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp340 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp341 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add2688 = fadd float undef, 0x4063266660000000
+ %add2688 = fadd float %val, 0x4063266660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins2692 = insertelement <4 x float> undef, float undef, i32 1
+ %vecins2692 = insertelement <4 x float> undef, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins2692, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins2692, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp342 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2183,9 +2183,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins2698 = insertelement <4 x float> %tmp343, float %add2697, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins2698, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins2698, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x40547999A0000000, float 0xC060633340000000, float 0x4075766660000000, float 0x4072D33340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x40547999A0000000, float 0xC060633340000000, float 0x4075766660000000, float 0x4072D33340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp344 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2193,7 +2193,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add2700 = fadd <4 x float> %tmp345, %tmp344
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add2700, <4 x float>* undef, align 16
+ store volatile <4 x float> %add2700, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp346 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2207,15 +2207,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp349 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext3121 = extractelement <4 x float> undef, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add3125 = fadd float undef, 0xC06F266660000000
+ %add3125 = fadd float %val, 0xC06F266660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3126 = insertelement <4 x float> undef, float %add3125, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3126, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3126, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp350 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2227,11 +2227,11 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3129 = insertelement <4 x float> %tmp351, float %add3128, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3129, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3129, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp352 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add3131 = fadd float undef, 3.215000e+02
+ %add3131 = fadd float %val, 3.215000e+02
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp353 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2239,15 +2239,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add3134 = fadd <4 x float> %tmp354, %tmp353
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add3134, <4 x float>* undef, align 16
+ store volatile <4 x float> %add3134, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp355 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add3136 = fadd float undef, 0x4074333340000000
+ %add3136 = fadd float %val, 0x4074333340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins3140 = insertelement <4 x float> undef, float undef, i32 1
+ %vecins3140 = insertelement <4 x float> undef, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3140, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3140, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp356 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2259,7 +2259,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3143 = insertelement <4 x float> %tmp357, float %add3142, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3143, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3143, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp358 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2271,15 +2271,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3146 = insertelement <4 x float> %tmp359, float %add3145, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3146, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3146, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp360 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins3272 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins3272 = insertelement <4 x float> undef, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3272, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3272, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x407B4999A0000000, float 0x40695CCCC0000000, float 0xC05C0CCCC0000000, float 0x407EB33340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x407B4999A0000000, float 0x40695CCCC0000000, float 0xC05C0CCCC0000000, float 0x407EB33340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp361 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2287,7 +2287,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add3274 = fadd <4 x float> %tmp362, %tmp361
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add3274, <4 x float>* undef, align 16
+ store volatile <4 x float> %add3274, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp363 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2299,7 +2299,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3277 = insertelement <4 x float> %tmp364, float %add3276, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3277, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3277, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp365 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2309,7 +2309,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3280 = insertelement <4 x float> undef, float %add3279, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3280, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3280, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp366 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2321,7 +2321,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3283 = insertelement <4 x float> %tmp367, float %add3282, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3283, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3283, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp368 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2333,7 +2333,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp369 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp370 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2345,7 +2345,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3291 = insertelement <4 x float> %tmp371, float %add3290, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3291, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3291, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext3292 = extractelement <4 x float> undef, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2353,11 +2353,11 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp373 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins3328 = insertelement <4 x float> %tmp373, float undef, i32 3
+ %vecins3328 = insertelement <4 x float> %tmp373, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add3330 = fadd <4 x float> undef, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add3330, <4 x float>* undef, align 16
+ store volatile <4 x float> %add3330, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext3331 = extractelement <4 x float> undef, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2367,7 +2367,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3333 = insertelement <4 x float> %tmp374, float %add3332, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3333, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3333, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext3334 = extractelement <4 x float> undef, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2385,7 +2385,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3339 = insertelement <4 x float> %tmp376, float %add3338, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3339, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3339, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp377 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2393,13 +2393,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp378 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins3342 = insertelement <4 x float> %tmp378, float undef, i32 3
+ %vecins3342 = insertelement <4 x float> %tmp378, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp379 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add3344 = fadd <4 x float> %tmp379, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add3344, <4 x float>* undef, align 16
+ store volatile <4 x float> %add3344, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp380 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2419,15 +2419,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3350 = insertelement <4 x float> %tmp382, float %add3349, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3350, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3350, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add3352 = fadd float undef, 0xC06ACCCCC0000000
+ %add3352 = fadd float %val, 0xC06ACCCCC0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp383 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins3423 = insertelement <4 x float> undef, float undef, i32 2
+ %vecins3423 = insertelement <4 x float> undef, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3423, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3423, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext3424 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2437,9 +2437,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3426 = insertelement <4 x float> %tmp384, float %add3425, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3426, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3426, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 2.795000e+02, float -4.065000e+02, float 0xC05CD999A0000000, float 1.825000e+02>, <4 x float>* undef
+ store volatile <4 x float> <float 2.795000e+02, float -4.065000e+02, float 0xC05CD999A0000000, float 1.825000e+02>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp385 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2457,7 +2457,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3431 = insertelement <4 x float> %tmp388, float %add3430, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3431, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3431, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp389 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2469,15 +2469,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3434 = insertelement <4 x float> %tmp390, float %add3433, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3434, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3434, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext3435 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp391 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins3437 = insertelement <4 x float> %tmp391, float undef, i32 2
+ %vecins3437 = insertelement <4 x float> %tmp391, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3437, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3437, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp392 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2485,7 +2485,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add3439 = fadd float %vecext3438, 0xC071D999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC0798199A0000000, float -3.385000e+02, float 0xC050066660000000, float 0xC075E999A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC0798199A0000000, float -3.385000e+02, float 0xC050066660000000, float 0xC075E999A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp393 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2493,7 +2493,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add3442 = fadd <4 x float> %tmp394, %tmp393
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add3442, <4 x float>* undef, align 16
+ store volatile <4 x float> %add3442, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext3443 = extractelement <4 x float> undef, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2509,7 +2509,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3448 = insertelement <4 x float> %tmp396, float %add3447, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3448, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3448, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp397 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2521,15 +2521,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3451 = insertelement <4 x float> %tmp398, float %add3450, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3451, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3451, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add3453 = fadd float undef, 0xC07ADCCCC0000000
+ %add3453 = fadd float %val, 0xC07ADCCCC0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp399 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3454 = insertelement <4 x float> %tmp399, float %add3453, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3454, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3454, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp400 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2539,7 +2539,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3459 = insertelement <4 x float> undef, float %add3458, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3459, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3459, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp401 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2547,19 +2547,19 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp402 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins3462 = insertelement <4 x float> %tmp402, float undef, i32 1
+ %vecins3462 = insertelement <4 x float> %tmp402, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3462, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3462, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp403 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add3464 = fadd float undef, 0xC057B999A0000000
+ %add3464 = fadd float %val, 0xC057B999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp404 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3465 = insertelement <4 x float> %tmp404, float %add3464, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3465, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3465, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp405 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2569,21 +2569,21 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp406 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x405C3999A0000000, float 0xC07C6B3340000000, float 0x407ACB3340000000, float 0xC06E0999A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x405C3999A0000000, float 0xC07C6B3340000000, float 0x407ACB3340000000, float 0xC06E0999A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp407 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp408 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext3477 = extractelement <4 x float> %tmp408, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins3479 = insertelement <4 x float> undef, float undef, i32 2
+ %vecins3479 = insertelement <4 x float> undef, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3479, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3479, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext3480 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2593,23 +2593,23 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3482 = insertelement <4 x float> %tmp409, float %add3481, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3482, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3482, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 3.565000e+02, float 0xC0464CCCC0000000, float 0x4037666660000000, float 0xC0788CCCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 3.565000e+02, float 0xC0464CCCC0000000, float 0x4037666660000000, float 0xC0788CCCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp410 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add3484 = fadd <4 x float> %tmp410, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add3484, <4 x float>* undef, align 16
+ store volatile <4 x float> %add3484, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp411 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add3486 = fadd float undef, -1.415000e+02
+ %add3486 = fadd float %val, -1.415000e+02
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3487 = insertelement <4 x float> undef, float %add3486, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3487, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3487, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp412 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2621,25 +2621,25 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3490 = insertelement <4 x float> %tmp413, float %add3489, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3490, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3490, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add3492 = fadd float undef, 0x4078066660000000
+ %add3492 = fadd float %val, 0x4078066660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp414 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3493 = insertelement <4 x float> %tmp414, float %add3492, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3493, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3493, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp415 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add3495 = fadd float undef, 0xC0798999A0000000
+ %add3495 = fadd float %val, 0xC0798999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp416 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3496 = insertelement <4 x float> %tmp416, float %add3495, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3496, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3496, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp417 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2647,7 +2647,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add3498 = fadd <4 x float> %tmp418, %tmp417
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add3498, <4 x float>* undef, align 16
+ store volatile <4 x float> %add3498, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext3499 = extractelement <4 x float> undef, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2663,25 +2663,25 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp420 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add3506 = fadd float undef, 0xC074DB3340000000
+ %add3506 = fadd float %val, 0xC074DB3340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp421 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins3507 = insertelement <4 x float> %tmp421, float %add3506, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins3507, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins3507, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add3509 = fadd float undef, 0xC066033340000000
+ %add3509 = fadd float %val, 0xC066033340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp422 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x404B333340000000, float 4.680000e+02, float 0x40577999A0000000, float 0xC07D9999A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x404B333340000000, float 4.680000e+02, float 0x40577999A0000000, float 0xC07D9999A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp423 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext3513 = extractelement <4 x float> undef, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2693,9 +2693,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext3516 = extractelement <4 x float> %tmp425, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins5414 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins5414 = insertelement <4 x float> undef, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5414, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5414, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp426 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2703,33 +2703,33 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add5416 = fadd <4 x float> %tmp427, %tmp426
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add5416, <4 x float>* undef, align 16
+ store volatile <4 x float> %add5416, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp428 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add5418 = fadd float undef, 0xC07ED999A0000000
+ %add5418 = fadd float %val, 0xC07ED999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp429 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5419 = insertelement <4 x float> %tmp429, float %add5418, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins5624 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins5624 = insertelement <4 x float> undef, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5624, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5624, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC07B4999A0000000, float 0x4078B33340000000, float 0xC07674CCC0000000, float 0xC07C533340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC07B4999A0000000, float 0x4078B33340000000, float 0xC07674CCC0000000, float 0xC07C533340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add5626 = fadd <4 x float> undef, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add5626, <4 x float>* undef, align 16
+ store volatile <4 x float> %add5626, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext5627 = extractelement <4 x float> undef, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp430 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins5629 = insertelement <4 x float> %tmp430, float undef, i32 0
+ %vecins5629 = insertelement <4 x float> %tmp430, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5629, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5629, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp431 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2739,13 +2739,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5632 = insertelement <4 x float> undef, float %add5631, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5632, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5632, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp432 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins5688 = insertelement <4 x float> %tmp432, float undef, i32 1
+ %vecins5688 = insertelement <4 x float> %tmp432, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5688, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5688, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp433 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2753,35 +2753,35 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp434 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins5691 = insertelement <4 x float> %tmp434, float undef, i32 2
+ %vecins5691 = insertelement <4 x float> %tmp434, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5691, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5691, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext5692 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float -4.350000e+02, float 0xC0775CCCC0000000, float 0xC0714999A0000000, float 0xC0661999A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float -4.350000e+02, float 0xC0775CCCC0000000, float 0xC0714999A0000000, float 0xC0661999A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp435 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add5696 = fadd <4 x float> undef, %tmp435
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add5696, <4 x float>* undef, align 16
+ store volatile <4 x float> %add5696, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add5701 = fadd float undef, 0x4077D4CCC0000000
+ %add5701 = fadd float %val, 0x4077D4CCC0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp436 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5702 = insertelement <4 x float> %tmp436, float %add5701, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5702, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5702, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp437 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp438 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins5705 = insertelement <4 x float> %tmp438, float undef, i32 2
+ %vecins5705 = insertelement <4 x float> %tmp438, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5705, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5705, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp439 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2793,9 +2793,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5708 = insertelement <4 x float> %tmp440, float %add5707, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5708, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5708, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x405D666660000000, float 0xC069333340000000, float 0x407B6B3340000000, float 0xC06EB33340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x405D666660000000, float 0xC069333340000000, float 0x407B6B3340000000, float 0xC06EB33340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp441 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2803,7 +2803,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add5710 = fadd <4 x float> %tmp442, %tmp441
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add5710, <4 x float>* undef, align 16
+ store volatile <4 x float> %add5710, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp443 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2815,19 +2815,19 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5713 = insertelement <4 x float> %tmp444, float %add5712, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5713, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5713, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp445 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp446 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins5716 = insertelement <4 x float> %tmp446, float undef, i32 1
+ %vecins5716 = insertelement <4 x float> %tmp446, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp447 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add5724 = fadd <4 x float> %tmp447, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add5724, <4 x float>* undef, align 16
+ store volatile <4 x float> %add5724, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp448 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2835,21 +2835,21 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp449 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins5750 = insertelement <4 x float> %tmp449, float undef, i32 3
+ %vecins5750 = insertelement <4 x float> %tmp449, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x40692999A0000000, float 0xC07C4CCCC0000000, float 0x407D1E6660000000, float 0x407B4199A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x40692999A0000000, float 0xC07C4CCCC0000000, float 0x407D1E6660000000, float 0x407B4199A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp450 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add5752 = fadd <4 x float> undef, %tmp450
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add5754 = fadd float undef, 0xC064033340000000
+ %add5754 = fadd float %val, 0xC064033340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp451 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5755 = insertelement <4 x float> %tmp451, float %add5754, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5755, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5755, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp452 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2861,7 +2861,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5758 = insertelement <4 x float> %tmp453, float %add5757, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5758, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5758, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp454 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2869,9 +2869,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp455 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins5761 = insertelement <4 x float> %tmp455, float undef, i32 2
+ %vecins5761 = insertelement <4 x float> %tmp455, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5761, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5761, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp456 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2883,13 +2883,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5764 = insertelement <4 x float> %tmp457, float %add5763, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5764, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5764, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x407A6B3340000000, float 0x40470CCCC0000000, float 0xC076F4CCC0000000, float 0x40791999A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x407A6B3340000000, float 0x40470CCCC0000000, float 0xC076F4CCC0000000, float 0x40791999A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add5766 = fadd <4 x float> undef, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add5766, <4 x float>* undef, align 16
+ store volatile <4 x float> %add5766, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp458 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2901,9 +2901,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5769 = insertelement <4 x float> %tmp459, float %add5768, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5769, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5769, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add5771 = fadd float undef, 8.000000e+00
+ %add5771 = fadd float %val, 8.000000e+00
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp460 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2911,11 +2911,11 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp461 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add5796 = fadd float undef, 0x4058ECCCC0000000
+ %add5796 = fadd float %val, 0x4058ECCCC0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5797 = insertelement <4 x float> undef, float %add5796, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5797, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5797, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp462 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2923,7 +2923,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp463 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins5800 = insertelement <4 x float> %tmp463, float undef, i32 1
+ %vecins5800 = insertelement <4 x float> %tmp463, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp464 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2935,7 +2935,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5803 = insertelement <4 x float> %tmp465, float %add5802, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5803, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5803, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp466 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2947,11 +2947,11 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5806 = insertelement <4 x float> %tmp467, float %add5805, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5806, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5806, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp468 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp469 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2961,7 +2961,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp470 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp471 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2973,9 +2973,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5820 = insertelement <4 x float> %tmp472, float %add5819, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5820, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5820, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x40514CCCC0000000, float 0x406A7999A0000000, float 0xC078766660000000, float 0xC0522CCCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x40514CCCC0000000, float 0x406A7999A0000000, float 0xC078766660000000, float 0xC0522CCCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp473 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2983,7 +2983,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add5822 = fadd <4 x float> %tmp474, %tmp473
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add5822, <4 x float>* undef, align 16
+ store volatile <4 x float> %add5822, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp475 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2991,7 +2991,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp476 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins5825 = insertelement <4 x float> %tmp476, float undef, i32 0
+ %vecins5825 = insertelement <4 x float> %tmp476, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp477 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3003,7 +3003,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5828 = insertelement <4 x float> %tmp478, float %add5827, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5828, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5828, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp479 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3015,19 +3015,19 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5831 = insertelement <4 x float> %tmp480, float %add5830, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float -3.370000e+02, float 0xC072DE6660000000, float -2.670000e+02, float 0x4062333340000000>, <4 x float>* undef
+ store volatile <4 x float> <float -3.370000e+02, float 0xC072DE6660000000, float -2.670000e+02, float 0x4062333340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp481 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext5837 = extractelement <4 x float> %tmp481, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins5839 = insertelement <4 x float> undef, float undef, i32 0
+ %vecins5839 = insertelement <4 x float> undef, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5839, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5839, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp482 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3035,33 +3035,33 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp483 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins5842 = insertelement <4 x float> %tmp483, float undef, i32 1
+ %vecins5842 = insertelement <4 x float> %tmp483, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5842, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5842, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp484 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp485 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins5845 = insertelement <4 x float> %tmp485, float undef, i32 2
+ %vecins5845 = insertelement <4 x float> %tmp485, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5845, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5845, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC06EC999A0000000, float 0x406D5999A0000000, float 0x4056F33340000000, float 0xC07E14CCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC06EC999A0000000, float 0x406D5999A0000000, float 0x4056F33340000000, float 0xC07E14CCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add5850 = fadd <4 x float> undef, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add5850, <4 x float>* undef, align 16
+ store volatile <4 x float> %add5850, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp486 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add5852 = fadd float undef, 2.985000e+02
+ %add5852 = fadd float %val, 2.985000e+02
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp487 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5853 = insertelement <4 x float> %tmp487, float %add5852, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5853, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5853, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp488 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3073,17 +3073,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5856 = insertelement <4 x float> %tmp489, float %add5855, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5856, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5856, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp490 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add5858 = fadd float undef, 0x4071666660000000
+ %add5858 = fadd float %val, 0x4071666660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp491 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5859 = insertelement <4 x float> %tmp491, float %add5858, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5859, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5859, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp492 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3099,19 +3099,19 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5901 = insertelement <4 x float> %tmp494, float %add5900, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5901, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5901, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add5914 = fadd float undef, 0x40786E6660000000
+ %add5914 = fadd float %val, 0x40786E6660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins5918 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins5918 = insertelement <4 x float> undef, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5918, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5918, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x406F266660000000, float 7.900000e+01, float -4.695000e+02, float -4.880000e+02>, <4 x float>* undef
+ store volatile <4 x float> <float 0x406F266660000000, float 7.900000e+01, float -4.695000e+02, float -4.880000e+02>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add5920 = fadd <4 x float> undef, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add5920, <4 x float>* undef, align 16
+ store volatile <4 x float> %add5920, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add5934 = fadd <4 x float> undef, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3121,7 +3121,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp495 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp496 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3131,13 +3131,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins5996 = insertelement <4 x float> undef, float %add5995, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins5996, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins5996, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp497 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext5997 = extractelement <4 x float> %tmp497, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp498 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3149,15 +3149,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6002 = insertelement <4 x float> %tmp499, float %add6001, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6002, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6002, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC07EA199A0000000, float 0x407DC33340000000, float 0xC0753199A0000000, float -3.895000e+02>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC07EA199A0000000, float 0x407DC33340000000, float 0xC0753199A0000000, float -3.895000e+02>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp500 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6004 = fadd <4 x float> undef, %tmp500
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6004, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6004, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp501 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3165,7 +3165,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp502 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6007 = insertelement <4 x float> %tmp502, float undef, i32 0
+ %vecins6007 = insertelement <4 x float> %tmp502, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp503 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3173,9 +3173,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp504 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6024 = insertelement <4 x float> %tmp504, float undef, i32 1
+ %vecins6024 = insertelement <4 x float> %tmp504, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6024, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6024, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp505 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3187,7 +3187,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6027 = insertelement <4 x float> %tmp506, float %add6026, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6027, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6027, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext6028 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3197,15 +3197,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6030 = insertelement <4 x float> %tmp507, float %add6029, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6030, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6030, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC0527999A0000000, float 0xC06AD999A0000000, float 0x3FF6666660000000, float 0xC03F666660000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC0527999A0000000, float 0xC06AD999A0000000, float 0x3FF6666660000000, float 0xC03F666660000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp508 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp509 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp510 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3213,7 +3213,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp511 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext6036 = extractelement <4 x float> undef, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3221,17 +3221,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6038 = insertelement <4 x float> undef, float %add6037, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6038, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6038, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp512 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add6040 = fadd float undef, 0x4071ECCCC0000000
+ %add6040 = fadd float %val, 0x4071ECCCC0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp513 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6041 = insertelement <4 x float> %tmp513, float %add6040, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6041, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6041, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp514 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3243,9 +3243,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6044 = insertelement <4 x float> %tmp515, float %add6043, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6044, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6044, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC065FCCCC0000000, float 0x40767CCCC0000000, float 0x4079D4CCC0000000, float 0xC07314CCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC065FCCCC0000000, float 0x40767CCCC0000000, float 0x4079D4CCC0000000, float 0xC07314CCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp516 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3253,15 +3253,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6046 = fadd <4 x float> %tmp517, %tmp516
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6046, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6046, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext6047 = extractelement <4 x float> undef, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp518 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6049 = insertelement <4 x float> %tmp518, float undef, i32 0
+ %vecins6049 = insertelement <4 x float> %tmp518, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6049, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6049, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp519 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3269,19 +3269,19 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6051 = fadd float %vecext6050, 0x407E4E6660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6055 = insertelement <4 x float> undef, float undef, i32 2
+ %vecins6055 = insertelement <4 x float> undef, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext6056 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp520 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext6061 = extractelement <4 x float> undef, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp521 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp522 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3295,9 +3295,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6072 = insertelement <4 x float> undef, float %add6071, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6072, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6072, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x40546CCCC0000000, float 0x4067D66660000000, float 0xC060E33340000000, float 0x4061533340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x40546CCCC0000000, float 0x4067D66660000000, float 0xC060E33340000000, float 0x4061533340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp523 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3305,7 +3305,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6074 = fadd <4 x float> %tmp524, %tmp523
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6074, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6074, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp525 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3317,23 +3317,23 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6077 = insertelement <4 x float> %tmp526, float %add6076, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6077, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6077, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp527 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add6079 = fadd float undef, 0xC07E9B3340000000
+ %add6079 = fadd float %val, 0xC07E9B3340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp528 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp529 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add6082 = fadd float undef, 0x407DCE6660000000
+ %add6082 = fadd float %val, 0x407DCE6660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6083 = insertelement <4 x float> undef, float %add6082, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6083, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6083, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp530 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3343,9 +3343,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6086 = insertelement <4 x float> undef, float %add6085, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6086, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6086, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x4055C66660000000, float 0x40735199A0000000, float 0xC0713199A0000000, float 0x40729B3340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x4055C66660000000, float 0x40735199A0000000, float 0xC0713199A0000000, float 0x40729B3340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp531 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3353,19 +3353,19 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6088 = fadd <4 x float> %tmp532, %tmp531
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6088, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6088, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp533 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext6089 = extractelement <4 x float> %tmp533, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add6107 = fadd float undef, 0xC06A166660000000
+ %add6107 = fadd float %val, 0xC06A166660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp534 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6108 = insertelement <4 x float> %tmp534, float %add6107, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6108, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6108, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp535 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3375,7 +3375,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp536 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp537 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3395,7 +3395,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6119 = insertelement <4 x float> %tmp540, float %add6118, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6119, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6119, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp541 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3407,7 +3407,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6122 = insertelement <4 x float> %tmp542, float %add6121, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6122, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6122, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext6123 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3415,17 +3415,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp543 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext6126 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp544 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6128 = insertelement <4 x float> %tmp544, float undef, i32 3
+ %vecins6128 = insertelement <4 x float> %tmp544, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6128, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6128, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float -2.980000e+02, float 0xC06F0CCCC0000000, float 0xC054A66660000000, float 0xC040CCCCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float -2.980000e+02, float 0xC06F0CCCC0000000, float 0xC054A66660000000, float 0xC040CCCCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp545 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3441,7 +3441,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6133 = insertelement <4 x float> undef, float %add6132, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6133, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6133, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext6134 = extractelement <4 x float> undef, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3463,9 +3463,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp551 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6178 = insertelement <4 x float> %tmp551, float undef, i32 1
+ %vecins6178 = insertelement <4 x float> %tmp551, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6178, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6178, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp552 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3487,13 +3487,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6184 = insertelement <4 x float> %tmp555, float %add6183, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6184, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6184, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp556 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6189 = insertelement <4 x float> undef, float undef, i32 0
+ %vecins6189 = insertelement <4 x float> undef, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6189, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6189, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp557 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3505,7 +3505,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6192 = insertelement <4 x float> %tmp558, float %add6191, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6192, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6192, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp559 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3519,7 +3519,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6198 = insertelement <4 x float> %tmp561, float %add6197, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x407904CCC0000000, float 0x406A833340000000, float 4.895000e+02, float 0x40648999A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x407904CCC0000000, float 0x406A833340000000, float 4.895000e+02, float 0x40648999A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp562 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3527,7 +3527,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6200 = fadd <4 x float> %tmp563, %tmp562
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6200, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6200, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp564 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3535,7 +3535,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp565 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6203 = insertelement <4 x float> %tmp565, float undef, i32 0
+ %vecins6203 = insertelement <4 x float> %tmp565, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp566 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3549,9 +3549,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp568 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6209 = insertelement <4 x float> %tmp568, float undef, i32 2
+ %vecins6209 = insertelement <4 x float> %tmp568, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6209, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6209, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp569 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3559,7 +3559,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp570 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add6219 = fadd float undef, 0xC0596CCCC0000000
+ %add6219 = fadd float %val, 0xC0596CCCC0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp571 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3573,7 +3573,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6228 = fadd <4 x float> undef, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6228, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6228, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext6229 = extractelement <4 x float> undef, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3583,7 +3583,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6231 = insertelement <4 x float> %tmp573, float %add6230, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6231, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6231, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp574 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3595,7 +3595,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6234 = insertelement <4 x float> %tmp575, float %add6233, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6234, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6234, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext6235 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3603,13 +3603,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6237 = insertelement <4 x float> undef, float %add6236, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6237, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6237, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp576 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6245 = insertelement <4 x float> undef, float undef, i32 0
+ %vecins6245 = insertelement <4 x float> undef, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6245, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6245, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp577 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3619,17 +3619,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp578 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6251 = insertelement <4 x float> undef, float undef, i32 2
+ %vecins6251 = insertelement <4 x float> undef, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp579 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add6253 = fadd float undef, 0xC0692999A0000000
+ %add6253 = fadd float %val, 0xC0692999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6254 = insertelement <4 x float> undef, float %add6253, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6254, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6254, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 4.600000e+02, float 0xC0777B3340000000, float 0x40351999A0000000, float 0xC06E433340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 4.600000e+02, float 0xC0777B3340000000, float 0x40351999A0000000, float 0xC06E433340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp580 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3637,7 +3637,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6256 = fadd <4 x float> %tmp581, %tmp580
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6256, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6256, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp582 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3649,7 +3649,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6259 = insertelement <4 x float> %tmp583, float %add6258, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6259, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6259, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp584 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3661,7 +3661,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6262 = insertelement <4 x float> %tmp585, float %add6261, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6262, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6262, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp586 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3669,9 +3669,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp587 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6265 = insertelement <4 x float> %tmp587, float undef, i32 2
+ %vecins6265 = insertelement <4 x float> %tmp587, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6265, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6265, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp588 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3683,9 +3683,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6268 = insertelement <4 x float> %tmp589, float %add6267, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6268, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6268, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float -3.130000e+02, float 0xC079733340000000, float -4.660000e+02, float 0xC064E66660000000>, <4 x float>* undef
+ store volatile <4 x float> <float -3.130000e+02, float 0xC079733340000000, float -4.660000e+02, float 0xC064E66660000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp590 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3693,7 +3693,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6270 = fadd <4 x float> %tmp591, %tmp590
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6270, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6270, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp592 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3705,7 +3705,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6273 = insertelement <4 x float> %tmp593, float %add6272, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6273, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6273, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp594 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3717,7 +3717,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6276 = insertelement <4 x float> %tmp595, float %add6275, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6276, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6276, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp596 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3729,7 +3729,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6279 = insertelement <4 x float> %tmp597, float %add6278, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6279, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6279, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp598 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3739,21 +3739,21 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6282 = insertelement <4 x float> undef, float %add6281, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6282, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6282, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x4067ECCCC0000000, float 0xC040CCCCC0000000, float 0xC0762E6660000000, float -4.750000e+02>, <4 x float>* undef
+ store volatile <4 x float> <float 0x4067ECCCC0000000, float 0xC040CCCCC0000000, float 0xC0762E6660000000, float -4.750000e+02>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6284 = fadd <4 x float> undef, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext6285 = extractelement <4 x float> undef, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add6289 = fadd float undef, 0xC0738999A0000000
+ %add6289 = fadd float %val, 0xC0738999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp599 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6293 = insertelement <4 x float> %tmp599, float undef, i32 2
+ %vecins6293 = insertelement <4 x float> %tmp599, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6293, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6293, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp600 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3763,15 +3763,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6296 = insertelement <4 x float> undef, float %add6295, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6296, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6296, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x40704199A0000000, float 0x40753CCCC0000000, float 0xC07E2199A0000000, float 0xC068833340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x40704199A0000000, float 0x40753CCCC0000000, float 0xC07E2199A0000000, float 0xC068833340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp601 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6298 = fadd <4 x float> undef, %tmp601
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6298, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6298, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp602 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3783,7 +3783,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6301 = insertelement <4 x float> %tmp603, float %add6300, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6301, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6301, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp604 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3795,7 +3795,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6304 = insertelement <4 x float> %tmp605, float %add6303, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6304, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6304, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp606 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3805,7 +3805,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6307 = insertelement <4 x float> undef, float %add6306, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6307, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6307, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp607 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3817,9 +3817,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6310 = insertelement <4 x float> %tmp608, float %add6309, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6310, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6310, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x407A233340000000, float 0x406DA33340000000, float 3.725000e+02, float 0x40761199A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x407A233340000000, float 0x406DA33340000000, float 3.725000e+02, float 0x40761199A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp609 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3827,7 +3827,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6312 = fadd <4 x float> %tmp610, %tmp609
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6312, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6312, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp611 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3849,13 +3849,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6657 = insertelement <4 x float> %tmp614, float %add6656, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6657, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6657, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6660 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins6660 = insertelement <4 x float> undef, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6660, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6660, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC064E33340000000, float 0xC064833340000000, float 0xC0673CCCC0000000, float 0xC074266660000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC064E33340000000, float 0xC064833340000000, float 0xC0673CCCC0000000, float 0xC074266660000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp615 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3867,7 +3867,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6665 = insertelement <4 x float> %tmp616, float %add6664, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp617 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3875,15 +3875,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp618 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC07CC4CCC0000000, float 0x404EE66660000000, float 0xC0754CCCC0000000, float 0xC0744B3340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC07CC4CCC0000000, float 0x404EE66660000000, float 0xC0754CCCC0000000, float 0xC0744B3340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp619 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6676 = fadd <4 x float> %tmp619, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6676, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6676, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp620 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3901,7 +3901,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp622 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp623 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3913,7 +3913,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6685 = insertelement <4 x float> %tmp624, float %add6684, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6685, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6685, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp625 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3925,15 +3925,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6688 = insertelement <4 x float> %tmp626, float %add6687, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6688, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6688, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 7.500000e+00, float 0x4077E33340000000, float 0xC0596CCCC0000000, float 0xC07D4E6660000000>, <4 x float>* undef
+ store volatile <4 x float> <float 7.500000e+00, float 0x4077E33340000000, float 0xC0596CCCC0000000, float 0xC07D4E6660000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp627 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6690 = fadd <4 x float> undef, %tmp627
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6690, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6690, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp628 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3945,7 +3945,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6693 = insertelement <4 x float> %tmp629, float %add6692, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6693, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6693, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp630 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3957,7 +3957,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6696 = insertelement <4 x float> %tmp631, float %add6695, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6696, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6696, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp632 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3969,7 +3969,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6699 = insertelement <4 x float> %tmp633, float %add6698, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6699, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6699, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp634 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3981,17 +3981,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6702 = insertelement <4 x float> %tmp635, float %add6701, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6702, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6702, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x40772CCCC0000000, float 0xC0625CCCC0000000, float 6.200000e+01, float 0xC06ADCCCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x40772CCCC0000000, float 0xC0625CCCC0000000, float 6.200000e+01, float 0xC06ADCCCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp636 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp637 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6707 = insertelement <4 x float> undef, float undef, i32 0
+ %vecins6707 = insertelement <4 x float> undef, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6707, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6707, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp638 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3999,7 +3999,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp639 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp640 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4031,21 +4031,21 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp645 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add6726 = fadd float undef, 0x4059B999A0000000
+ %add6726 = fadd float %val, 0x4059B999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp646 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6727 = insertelement <4 x float> %tmp646, float %add6726, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6727, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6727, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext6728 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6729 = fadd float %vecext6728, 0xC073466660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC0309999A0000000, float -2.715000e+02, float 1.620000e+02, float 0x40674CCCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC0309999A0000000, float -2.715000e+02, float 1.620000e+02, float 0x40674CCCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp647 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4053,7 +4053,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6732 = fadd <4 x float> %tmp648, %tmp647
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6732, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6732, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp649 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4065,7 +4065,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6735 = insertelement <4 x float> %tmp650, float %add6734, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6735, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6735, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp651 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4077,7 +4077,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6738 = insertelement <4 x float> %tmp652, float %add6737, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6738, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6738, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp653 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4089,7 +4089,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6741 = insertelement <4 x float> %tmp654, float %add6740, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6741, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6741, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp655 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4101,7 +4101,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6744 = insertelement <4 x float> %tmp656, float %add6743, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6744, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6744, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp657 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4109,21 +4109,21 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6746 = fadd <4 x float> %tmp658, %tmp657
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6746, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6746, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp659 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6749 = insertelement <4 x float> undef, float undef, i32 0
+ %vecins6749 = insertelement <4 x float> undef, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6749, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6749, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp660 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add6751 = fadd float undef, 0x4075DE6660000000
+ %add6751 = fadd float %val, 0x4075DE6660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6752 = insertelement <4 x float> undef, float %add6751, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6752, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6752, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp661 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4133,7 +4133,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6755 = insertelement <4 x float> undef, float %add6754, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6755, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6755, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp662 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4145,15 +4145,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6758 = insertelement <4 x float> %tmp663, float %add6757, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6758, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6758, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x403D1999A0000000, float 0xC05F533340000000, float 3.945000e+02, float 3.950000e+01>, <4 x float>* undef
+ store volatile <4 x float> <float 0x403D1999A0000000, float 0xC05F533340000000, float 3.945000e+02, float 3.950000e+01>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp664 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6760 = fadd <4 x float> undef, %tmp664
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6760, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6760, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp665 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4165,9 +4165,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp666 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC079BE6660000000, float 4.930000e+02, float 0x406CC33340000000, float 0xC062E999A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC079BE6660000000, float 4.930000e+02, float 0x406CC33340000000, float 0xC062E999A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp667 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4183,7 +4183,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6777 = insertelement <4 x float> %tmp669, float %add6776, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6777, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6777, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp670 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4195,9 +4195,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext6784 = extractelement <4 x float> %tmp671, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6875 = insertelement <4 x float> undef, float undef, i32 0
+ %vecins6875 = insertelement <4 x float> undef, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6875, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6875, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp672 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4207,15 +4207,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6878 = insertelement <4 x float> undef, float %add6877, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6878, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6878, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add6888 = fadd float undef, 0x4057CCCCC0000000
+ %add6888 = fadd float %val, 0x4057CCCCC0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp673 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6889 = insertelement <4 x float> %tmp673, float %add6888, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6889, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6889, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp674 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4227,7 +4227,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6892 = insertelement <4 x float> %tmp675, float %add6891, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6892, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6892, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp676 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4239,7 +4239,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6895 = insertelement <4 x float> %tmp677, float %add6894, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6895, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6895, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp678 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4249,7 +4249,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6900 = fadd <4 x float> %tmp680, %tmp679
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6900, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6900, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp681 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4261,9 +4261,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6903 = insertelement <4 x float> %tmp682, float %add6902, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6903, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6903, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add6905 = fadd float undef, 0x4031B33340000000
+ %add6905 = fadd float %val, 0x4031B33340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp683 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4271,9 +4271,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp684 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6912 = insertelement <4 x float> %tmp684, float undef, i32 3
+ %vecins6912 = insertelement <4 x float> %tmp684, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 3.315000e+02, float 0xC066C999A0000000, float 0xC061F33340000000, float 0x4071166660000000>, <4 x float>* undef
+ store volatile <4 x float> <float 3.315000e+02, float 0xC066C999A0000000, float 0xC061F33340000000, float 0x4071166660000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp685 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4281,13 +4281,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6914 = fadd <4 x float> %tmp686, %tmp685
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6914, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6914, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext6915 = extractelement <4 x float> undef, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6920 = insertelement <4 x float> undef, float undef, i32 1
+ %vecins6920 = insertelement <4 x float> undef, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6920, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6920, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext6921 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4295,11 +4295,11 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp687 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6926 = insertelement <4 x float> %tmp687, float undef, i32 3
+ %vecins6926 = insertelement <4 x float> %tmp687, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6926, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6926, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC03C4CCCC0000000, float 0xC07E5199A0000000, float -8.250000e+01, float 0xC043B33340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC03C4CCCC0000000, float 0xC07E5199A0000000, float -8.250000e+01, float 0xC043B33340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp688 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4307,13 +4307,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6928 = fadd <4 x float> %tmp689, %tmp688
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6928, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6928, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add6930 = fadd float undef, -4.590000e+02
+ %add6930 = fadd float %val, -4.590000e+02
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6931 = insertelement <4 x float> undef, float %add6930, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6931, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6931, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp690 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4323,7 +4323,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp691 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp692 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4349,15 +4349,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp695 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add6950 = fadd float undef, 0xC078F33340000000
+ %add6950 = fadd float %val, 0xC078F33340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp696 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6951 = insertelement <4 x float> %tmp696, float %add6950, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6951, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6951, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp697 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4369,7 +4369,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6954 = insertelement <4 x float> %tmp698, float %add6953, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6954, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6954, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp699 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4377,7 +4377,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6956 = fadd <4 x float> %tmp700, %tmp699
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6956, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6956, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp701 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4389,7 +4389,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6959 = insertelement <4 x float> %tmp702, float %add6958, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6959, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6959, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp703 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4401,15 +4401,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6965 = insertelement <4 x float> %tmp704, float %add6964, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6965, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6965, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add6975 = fadd float undef, 0x406AF33340000000
+ %add6975 = fadd float %val, 0x406AF33340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp705 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6976 = insertelement <4 x float> %tmp705, float %add6975, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6976, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6976, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp706 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4417,7 +4417,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6984 = fadd <4 x float> %tmp707, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6984, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6984, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp708 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4429,7 +4429,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins6987 = insertelement <4 x float> %tmp709, float %add6986, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6987, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6987, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp710 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4439,11 +4439,11 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp711 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins6996 = insertelement <4 x float> %tmp711, float undef, i32 3
+ %vecins6996 = insertelement <4 x float> %tmp711, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins6996, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins6996, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x4077A4CCC0000000, float 0xC0757199A0000000, float 0xC072F4CCC0000000, float 0xC071DCCCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x4077A4CCC0000000, float 0xC0757199A0000000, float 0xC072F4CCC0000000, float 0xC071DCCCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp712 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4451,7 +4451,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add6998 = fadd <4 x float> %tmp713, %tmp712
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add6998, <4 x float>* undef, align 16
+ store volatile <4 x float> %add6998, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp714 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4463,7 +4463,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7001 = insertelement <4 x float> %tmp715, float %add7000, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7001, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7001, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp716 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4475,11 +4475,11 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7004 = insertelement <4 x float> %tmp717, float %add7003, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp718 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add7140 = fadd float undef, 0x403D333340000000
+ %add7140 = fadd float %val, 0x403D333340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7141 = insertelement <4 x float> undef, float %add7140, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4489,7 +4489,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7144 = insertelement <4 x float> undef, float %add7143, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp719 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4501,15 +4501,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7150 = insertelement <4 x float> %tmp720, float %add7149, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7150, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7150, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 1.700000e+02, float 0xC077B4CCC0000000, float 0x40625999A0000000, float 0x406C166660000000>, <4 x float>* undef
+ store volatile <4 x float> <float 1.700000e+02, float 0xC077B4CCC0000000, float 0x40625999A0000000, float 0x406C166660000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp721 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add7152 = fadd <4 x float> %tmp721, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add7152, <4 x float>* undef, align 16
+ store volatile <4 x float> %add7152, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext7156 = extractelement <4 x float> undef, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4519,7 +4519,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7158 = insertelement <4 x float> %tmp722, float %add7157, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7158, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7158, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp723 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4531,13 +4531,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7161 = insertelement <4 x float> %tmp724, float %add7160, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7161, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7161, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add7168 = fadd float undef, 0xC072F199A0000000
+ %add7168 = fadd float %val, 0xC072F199A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp725 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext7170 = extractelement <4 x float> undef, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4545,11 +4545,11 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7172 = insertelement <4 x float> undef, float %add7171, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7172, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7172, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext7173 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp726 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4559,7 +4559,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7421 = insertelement <4 x float> undef, float %add7420, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7421, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7421, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp727 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4571,7 +4571,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7424 = insertelement <4 x float> %tmp728, float %add7423, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7424, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7424, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp729 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4583,11 +4583,11 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7427 = insertelement <4 x float> %tmp730, float %add7426, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7427, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7427, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext7428 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp731 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4599,9 +4599,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7570 = insertelement <4 x float> %tmp732, float %add7569, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7570, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7570, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x40745199A0000000, float 0xC0411999A0000000, float -5.650000e+01, float -4.005000e+02>, <4 x float>* undef
+ store volatile <4 x float> <float 0x40745199A0000000, float 0xC0411999A0000000, float -5.650000e+01, float -4.005000e+02>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp733 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4609,7 +4609,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add7572 = fadd <4 x float> %tmp734, %tmp733
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add7572, <4 x float>* undef, align 16
+ store volatile <4 x float> %add7572, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext7573 = extractelement <4 x float> undef, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4619,11 +4619,11 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7575 = insertelement <4 x float> %tmp735, float %add7574, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7575, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7575, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp736 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add7577 = fadd float undef, 0xC051666660000000
+ %add7577 = fadd float %val, 0xC051666660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp737 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4635,7 +4635,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7581 = insertelement <4 x float> undef, float %add7580, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7581, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7581, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp739 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4647,7 +4647,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7584 = insertelement <4 x float> %tmp740, float %add7583, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC057533340000000, float 0x4060A33340000000, float 0x40791E6660000000, float 2.455000e+02>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC057533340000000, float 0x4060A33340000000, float 0x40791E6660000000, float 2.455000e+02>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp741 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4655,7 +4655,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add7586 = fadd <4 x float> %tmp742, %tmp741
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add7586, <4 x float>* undef, align 16
+ store volatile <4 x float> %add7586, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp743 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4665,7 +4665,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp744 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp745 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4677,15 +4677,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7592 = insertelement <4 x float> %tmp746, float %add7591, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7592, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7592, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp747 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext7593 = extractelement <4 x float> %tmp747, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins7595 = insertelement <4 x float> undef, float undef, i32 2
+ %vecins7595 = insertelement <4 x float> undef, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7595, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7595, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp748 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4693,17 +4693,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add7597 = fadd float %vecext7596, 0x407E666660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x406A766660000000, float 0xBFC99999A0000000, float 0xC0751B3340000000, float -4.075000e+02>, <4 x float>* undef
+ store volatile <4 x float> <float 0x406A766660000000, float 0xBFC99999A0000000, float 0xC0751B3340000000, float -4.075000e+02>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp749 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add7616 = fadd float undef, 0xC04DE66660000000
+ %add7616 = fadd float %val, 0xC04DE66660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp750 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7617 = insertelement <4 x float> %tmp750, float %add7616, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7617, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7617, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp751 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4715,17 +4715,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7620 = insertelement <4 x float> %tmp752, float %add7619, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7620, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7620, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp753 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add7622 = fadd float undef, 0xC054B999A0000000
+ %add7622 = fadd float %val, 0xC054B999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp754 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins7626 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins7626 = insertelement <4 x float> undef, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7626, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7626, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp755 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4733,7 +4733,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add7628 = fadd <4 x float> %tmp756, %tmp755
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add7628, <4 x float>* undef, align 16
+ store volatile <4 x float> %add7628, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp757 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4745,13 +4745,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7631 = insertelement <4 x float> %tmp758, float %add7630, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add7639 = fadd float undef, 0x407C5999A0000000
+ %add7639 = fadd float %val, 0x407C5999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp759 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7640 = insertelement <4 x float> %tmp759, float %add7639, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x406AA66660000000, float 0x4067C66660000000, float 0xC054866660000000, float -2.400000e+01>, <4 x float>* undef
+ store volatile <4 x float> <float 0x406AA66660000000, float 0x4067C66660000000, float 0xC054866660000000, float -2.400000e+01>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp760 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4759,9 +4759,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp761 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add7644 = fadd float undef, 0xC0758999A0000000
+ %add7644 = fadd float %val, 0xC0758999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp762 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4773,7 +4773,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7648 = insertelement <4 x float> %tmp763, float %add7647, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7648, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7648, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp764 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4785,7 +4785,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7651 = insertelement <4 x float> %tmp765, float %add7650, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7651, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7651, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp766 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4797,7 +4797,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7654 = insertelement <4 x float> %tmp767, float %add7653, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7654, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7654, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp768 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4805,7 +4805,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add7656 = fadd <4 x float> %tmp769, %tmp768
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add7656, <4 x float>* undef, align 16
+ store volatile <4 x float> %add7656, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp770 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4817,7 +4817,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7659 = insertelement <4 x float> %tmp771, float %add7658, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7659, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7659, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp772 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4829,7 +4829,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7662 = insertelement <4 x float> %tmp773, float %add7661, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7662, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7662, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp774 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4841,7 +4841,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7665 = insertelement <4 x float> %tmp775, float %add7664, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7665, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7665, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp776 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4851,7 +4851,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7668 = insertelement <4 x float> undef, float %add7667, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7668, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7668, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp777 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4873,23 +4873,23 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp781 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp782 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add7731 = fadd float undef, 1.900000e+02
+ %add7731 = fadd float %val, 1.900000e+02
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp783 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins7732 = insertelement <4 x float> %tmp783, float %add7731, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7732, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7732, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp784 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins7735 = insertelement <4 x float> %tmp784, float undef, i32 2
+ %vecins7735 = insertelement <4 x float> %tmp784, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7735, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7735, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp785 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4897,11 +4897,11 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add7737 = fadd float %vecext7736, 0xC06AF66660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins7850 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins7850 = insertelement <4 x float> undef, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins7850, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins7850, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x4062A33340000000, float 2.290000e+02, float 0x40509999A0000000, float 0xC078BE6660000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x4062A33340000000, float 2.290000e+02, float 0x40509999A0000000, float 0xC078BE6660000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp786 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4909,7 +4909,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add7852 = fadd <4 x float> %tmp787, %tmp786
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add7852, <4 x float>* undef, align 16
+ store volatile <4 x float> %add7852, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp788 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4921,13 +4921,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9398 = insertelement <4 x float> %tmp789, float %add9397, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9398, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9398, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext9399 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp790 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins9401 = insertelement <4 x float> %tmp790, float undef, i32 2
+ %vecins9401 = insertelement <4 x float> %tmp790, float %val, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp791 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4939,11 +4939,11 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9404 = insertelement <4 x float> %tmp792, float %add9403, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9404, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9404, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp793 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp794 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4959,7 +4959,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp796 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp797 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4971,7 +4971,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9415 = insertelement <4 x float> %tmp798, float %add9414, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9415, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9415, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp799 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4983,9 +4983,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9418 = insertelement <4 x float> %tmp800, float %add9417, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9418, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9418, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 3.555000e+02, float 0xC062E33340000000, float 0x4065C66660000000, float -3.645000e+02>, <4 x float>* undef
+ store volatile <4 x float> <float 3.555000e+02, float 0xC062E33340000000, float 0x4065C66660000000, float -3.645000e+02>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp801 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4993,7 +4993,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add9420 = fadd <4 x float> %tmp802, %tmp801
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add9420, <4 x float>* undef, align 16
+ store volatile <4 x float> %add9420, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp803 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5001,9 +5001,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp804 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins9423 = insertelement <4 x float> %tmp804, float undef, i32 0
+ %vecins9423 = insertelement <4 x float> %tmp804, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9423, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9423, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp805 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5015,17 +5015,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9426 = insertelement <4 x float> %tmp806, float %add9425, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9426, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9426, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp807 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add9428 = fadd float undef, 0xC065466660000000
+ %add9428 = fadd float %val, 0xC065466660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp808 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9429 = insertelement <4 x float> %tmp808, float %add9428, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9429, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9429, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp809 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5037,7 +5037,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9432 = insertelement <4 x float> %tmp810, float %add9431, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC07C7E6660000000, float 1.205000e+02, float 0x4050D999A0000000, float 0xC06B233340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC07C7E6660000000, float 1.205000e+02, float 0x4050D999A0000000, float 0xC06B233340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp811 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5045,7 +5045,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add9434 = fadd <4 x float> %tmp812, %tmp811
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add9436 = fadd float undef, -3.185000e+02
+ %add9436 = fadd float %val, -3.185000e+02
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp813 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5053,7 +5053,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp814 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp815 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5065,7 +5065,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9443 = insertelement <4 x float> %tmp816, float %add9442, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9443, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9443, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp817 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5077,7 +5077,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9446 = insertelement <4 x float> %tmp818, float %add9445, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9446, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9446, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp819 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5085,23 +5085,23 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add9448 = fadd <4 x float> %tmp820, %tmp819
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add9448, <4 x float>* undef, align 16
+ store volatile <4 x float> %add9448, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add9450 = fadd float undef, 0xC0718199A0000000
+ %add9450 = fadd float %val, 0xC0718199A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp821 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9451 = insertelement <4 x float> %tmp821, float %add9450, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9451, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9451, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp822 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp823 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins9454 = insertelement <4 x float> %tmp823, float undef, i32 1
+ %vecins9454 = insertelement <4 x float> %tmp823, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9454, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9454, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp824 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5113,23 +5113,23 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9457 = insertelement <4 x float> %tmp825, float %add9456, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9457, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9457, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext9458 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp826 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins9460 = insertelement <4 x float> %tmp826, float undef, i32 3
+ %vecins9460 = insertelement <4 x float> %tmp826, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9460, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9460, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x407B5E6660000000, float 0x40648999A0000000, float 0xC06B966660000000, float 0x40341999A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x407B5E6660000000, float 0x40648999A0000000, float 0xC06B966660000000, float 0x40341999A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp827 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add9462 = fadd <4 x float> %tmp827, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add9462, <4 x float>* undef, align 16
+ store volatile <4 x float> %add9462, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp828 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5137,23 +5137,23 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp829 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins9465 = insertelement <4 x float> %tmp829, float undef, i32 0
+ %vecins9465 = insertelement <4 x float> %tmp829, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add9467 = fadd float undef, 0x405D666660000000
+ %add9467 = fadd float %val, 0x405D666660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp830 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9468 = insertelement <4 x float> %tmp830, float %add9467, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9468, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9468, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp831 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add9470 = fadd float undef, 0x4077033340000000
+ %add9470 = fadd float %val, 0x4077033340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp832 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext9472 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5163,9 +5163,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9474 = insertelement <4 x float> %tmp833, float %add9473, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9474, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9474, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x404F733340000000, float 0x407AB4CCC0000000, float 0x40605999A0000000, float 0xC03E4CCCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x404F733340000000, float 0x407AB4CCC0000000, float 0x40605999A0000000, float 0xC03E4CCCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp834 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5173,7 +5173,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add9476 = fadd <4 x float> %tmp835, %tmp834
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add9476, <4 x float>* undef, align 16
+ store volatile <4 x float> %add9476, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp836 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5185,17 +5185,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9479 = insertelement <4 x float> %tmp837, float %add9478, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9479, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9479, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp838 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add9481 = fadd float undef, 0x407BE33340000000
+ %add9481 = fadd float %val, 0x407BE33340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp839 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9482 = insertelement <4 x float> %tmp839, float %add9481, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9482, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9482, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext9483 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5205,7 +5205,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9485 = insertelement <4 x float> %tmp840, float %add9484, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9485, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9485, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp841 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5215,13 +5215,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp842 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC076B999A0000000, float 0xC0706CCCC0000000, float 0x407904CCC0000000, float 0x407EE199A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC076B999A0000000, float 0xC0706CCCC0000000, float 0x407904CCC0000000, float 0x407EE199A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp843 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp844 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5229,15 +5229,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add9492 = fadd float %vecext9491, 0x407C166660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add9495 = fadd float undef, 0x407DBB3340000000
+ %add9495 = fadd float %val, 0x407DBB3340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp845 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9496 = insertelement <4 x float> %tmp845, float %add9495, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9496, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9496, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp846 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5249,41 +5249,41 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9499 = insertelement <4 x float> %tmp847, float %add9498, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9499, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9499, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp848 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add9501 = fadd float undef, 0x407D5CCCC0000000
+ %add9501 = fadd float %val, 0x407D5CCCC0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp849 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9502 = insertelement <4 x float> %tmp849, float %add9501, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9502, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9502, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp850 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add9504 = fadd <4 x float> %tmp850, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add9504, <4 x float>* undef, align 16
+ store volatile <4 x float> %add9504, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp851 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add9506 = fadd float undef, 0x4076EE6660000000
+ %add9506 = fadd float %val, 0x4076EE6660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp852 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9507 = insertelement <4 x float> %tmp852, float %add9506, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9507, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9507, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp853 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add9509 = fadd float undef, 0xC0535999A0000000
+ %add9509 = fadd float %val, 0xC0535999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp854 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp855 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5295,7 +5295,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9513 = insertelement <4 x float> %tmp856, float %add9512, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9513, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9513, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp857 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5303,11 +5303,11 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp858 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins9516 = insertelement <4 x float> %tmp858, float undef, i32 3
+ %vecins9516 = insertelement <4 x float> %tmp858, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9516, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9516, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x407254CCC0000000, float 0x407844CCC0000000, float 0xC04D9999A0000000, float 0xC0550CCCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x407254CCC0000000, float 0x407844CCC0000000, float 0xC04D9999A0000000, float 0xC0550CCCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp859 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5319,9 +5319,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp862 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins9521 = insertelement <4 x float> %tmp862, float undef, i32 0
+ %vecins9521 = insertelement <4 x float> %tmp862, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9521, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9521, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp863 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5333,25 +5333,25 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9524 = insertelement <4 x float> %tmp864, float %add9523, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9524, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9524, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp865 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add9526 = fadd float undef, 0x4072833340000000
+ %add9526 = fadd float %val, 0x4072833340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp866 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9527 = insertelement <4 x float> %tmp866, float %add9526, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9527, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9527, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp867 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins9530 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins9530 = insertelement <4 x float> undef, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9530, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9530, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x4072F4CCC0000000, float 0x4065CCCCC0000000, float 0x4051D33340000000, float 0x40680CCCC0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x4072F4CCC0000000, float 0x4065CCCCC0000000, float 0x4051D33340000000, float 0x40680CCCC0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp868 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5363,9 +5363,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp870 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins9535 = insertelement <4 x float> %tmp870, float undef, i32 0
+ %vecins9535 = insertelement <4 x float> %tmp870, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9535, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9535, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp871 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5377,7 +5377,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9538 = insertelement <4 x float> %tmp872, float %add9537, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9538, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9538, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp873 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5385,17 +5385,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add9543 = fadd float %vecext9542, 0x4050D999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add9576 = fadd float undef, 0x40219999A0000000
+ %add9576 = fadd float %val, 0x40219999A0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9577 = insertelement <4 x float> undef, float %add9576, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9577, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9577, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp874 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins9580 = insertelement <4 x float> undef, float undef, i32 1
+ %vecins9580 = insertelement <4 x float> undef, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9580, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9580, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp875 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5407,11 +5407,11 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9583 = insertelement <4 x float> %tmp876, float %add9582, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9583, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9583, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp877 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext9673 = extractelement <4 x float> undef, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5421,7 +5421,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9675 = insertelement <4 x float> %tmp878, float %add9674, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9675, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9675, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext9676 = extractelement <4 x float> undef, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5441,7 +5441,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9681 = insertelement <4 x float> %tmp881, float %add9680, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9681, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9681, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp882 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5451,7 +5451,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add9686 = fadd <4 x float> %tmp883, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add9686, <4 x float>* undef, align 16
+ store volatile <4 x float> %add9686, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp884 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5481,19 +5481,19 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9695 = insertelement <4 x float> %tmp888, float %add9694, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9695, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9695, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp889 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add9697 = fadd float undef, 0x4058D33340000000
+ %add9697 = fadd float %val, 0x4058D33340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp890 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9698 = insertelement <4 x float> %tmp890, float %add9697, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9698, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9698, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x4062CCCCC0000000, float 0x407AD999A0000000, float 0x40582CCCC0000000, float 0xC0712B3340000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x4062CCCCC0000000, float 0x407AD999A0000000, float 0x40582CCCC0000000, float 0xC0712B3340000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp891 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5509,7 +5509,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9703 = insertelement <4 x float> %tmp893, float %add9702, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9703, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9703, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp894 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5521,7 +5521,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9706 = insertelement <4 x float> %tmp895, float %add9705, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9706, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9706, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext9707 = extractelement <4 x float> undef, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5531,23 +5531,23 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9709 = insertelement <4 x float> %tmp896, float %add9708, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9709, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9709, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp897 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext9710 = extractelement <4 x float> %tmp897, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins9712 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins9712 = insertelement <4 x float> undef, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9712, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9712, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x4069F33340000000, float 0xC048266660000000, float 0x40638CCCC0000000, float 0xC07EC199A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x4069F33340000000, float 0xC048266660000000, float 0x40638CCCC0000000, float 0xC07EC199A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp898 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add9714 = fadd <4 x float> undef, %tmp898
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add9714, <4 x float>* undef, align 16
+ store volatile <4 x float> %add9714, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp899 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5555,9 +5555,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp900 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins9717 = insertelement <4 x float> %tmp900, float undef, i32 0
+ %vecins9717 = insertelement <4 x float> %tmp900, float %val, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9717, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9717, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp901 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5569,7 +5569,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9720 = insertelement <4 x float> %tmp902, float %add9719, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9720, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9720, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp903 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5581,7 +5581,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9723 = insertelement <4 x float> %tmp904, float %add9722, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9723, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9723, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp905 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5593,15 +5593,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9726 = insertelement <4 x float> %tmp906, float %add9725, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9726, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9726, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float -4.575000e+02, float 0x40713E6660000000, float 0x407D133340000000, float -1.425000e+02>, <4 x float>* undef
+ store volatile <4 x float> <float -4.575000e+02, float 0x40713E6660000000, float 0x407D133340000000, float -1.425000e+02>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp907 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add9728 = fadd <4 x float> %tmp907, undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add9728, <4 x float>* undef, align 16
+ store volatile <4 x float> %add9728, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp908 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5613,17 +5613,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9731 = insertelement <4 x float> %tmp909, float %add9730, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9731, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9731, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp910 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add9733 = fadd float undef, 0xC050F33340000000
+ %add9733 = fadd float %val, 0xC050F33340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp911 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9734 = insertelement <4 x float> %tmp911, float %add9733, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9734, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9734, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp912 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5635,23 +5635,23 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9737 = insertelement <4 x float> %tmp913, float %add9736, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9737, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9737, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp914 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext9738 = extractelement <4 x float> %tmp914, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins9740 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins9740 = insertelement <4 x float> undef, float %val, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9740, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9740, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 2.150000e+02, float 0x405A2CCCC0000000, float 2.310000e+02, float 0x404E1999A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 2.150000e+02, float 0x405A2CCCC0000000, float 2.310000e+02, float 0x404E1999A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp915 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp916 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp917 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5661,7 +5661,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9745 = insertelement <4 x float> undef, float %add9744, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9745, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9745, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp918 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5673,7 +5673,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9748 = insertelement <4 x float> %tmp919, float %add9747, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9748, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9748, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp920 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5685,7 +5685,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9751 = insertelement <4 x float> %tmp921, float %add9750, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9751, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9751, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp922 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5697,9 +5697,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9754 = insertelement <4 x float> %tmp923, float %add9753, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9754, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9754, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 2.590000e+02, float 0x407B7199A0000000, float 0xC07ED199A0000000, float 0xC064FCCCC0000000>, <4 x float>* %.compoundliteral9755
+ store volatile <4 x float> <float 2.590000e+02, float 0x407B7199A0000000, float 0xC07ED199A0000000, float 0xC064FCCCC0000000>, <4 x float>* %.compoundliteral9755
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp924 = load <4 x float>, <4 x float>* %.compoundliteral9755
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5717,7 +5717,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9759 = insertelement <4 x float> %tmp927, float %add9758, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9759, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9759, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp928 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5729,17 +5729,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9762 = insertelement <4 x float> %tmp929, float %add9761, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9762, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9762, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp930 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add9764 = fadd float undef, 0xC060E66660000000
+ %add9764 = fadd float %val, 0xC060E66660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp931 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9765 = insertelement <4 x float> %tmp931, float %add9764, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9765, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9765, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp932 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5751,9 +5751,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9768 = insertelement <4 x float> %tmp933, float %add9767, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9768, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9768, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x4032CCCCC0000000, float -9.600000e+01, float -5.000000e+02, float 0x4078EE6660000000>, <4 x float>* %.compoundliteral9769
+ store volatile <4 x float> <float 0x4032CCCCC0000000, float -9.600000e+01, float -5.000000e+02, float 0x4078EE6660000000>, <4 x float>* %.compoundliteral9769
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp934 = load <4 x float>, <4 x float>* %.compoundliteral9769
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5761,7 +5761,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add9770 = fadd <4 x float> %tmp935, %tmp934
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add9770, <4 x float>* undef, align 16
+ store volatile <4 x float> %add9770, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp936 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5773,7 +5773,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9773 = insertelement <4 x float> %tmp937, float %add9772, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9773, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9773, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp938 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5785,25 +5785,25 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins9776 = insertelement <4 x float> %tmp939, float %add9775, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins9776, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins9776, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext9816 = extractelement <4 x float> undef, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp940 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %vecins9818 = insertelement <4 x float> %tmp940, float undef, i32 1
+ %vecins9818 = insertelement <4 x float> %tmp940, float %val, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp941 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add10388 = fadd float undef, 4.755000e+02
+ %add10388 = fadd float %val, 4.755000e+02
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp942 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10389 = insertelement <4 x float> %tmp942, float %add10388, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10389, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10389, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp943 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5815,19 +5815,19 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10392 = insertelement <4 x float> %tmp944, float %add10391, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10392, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10392, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp945 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp946 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add10405 = fadd float undef, -5.650000e+01
+ %add10405 = fadd float %val, -5.650000e+01
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp947 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10406 = insertelement <4 x float> %tmp947, float %add10405, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10406, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10406, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp948 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5839,7 +5839,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10409 = insertelement <4 x float> %tmp949, float %add10408, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10409, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10409, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp950 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5849,9 +5849,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp951 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float -2.340000e+02, float -4.720000e+02, float 4.350000e+02, float 0xC059A66660000000>, <4 x float>* %.compoundliteral10413
+ store volatile <4 x float> <float -2.340000e+02, float -4.720000e+02, float 4.350000e+02, float 0xC059A66660000000>, <4 x float>* %.compoundliteral10413
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp952 = load <4 x float>, <4 x float>* %.compoundliteral10413
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5859,7 +5859,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add10414 = fadd <4 x float> %tmp953, %tmp952
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add10414, <4 x float>* undef, align 16
+ store volatile <4 x float> %add10414, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp954 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5871,7 +5871,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10417 = insertelement <4 x float> %tmp955, float %add10416, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10417, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10417, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp956 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5883,15 +5883,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10420 = insertelement <4 x float> %tmp957, float %add10419, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10420, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10420, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add10422 = fadd float undef, 0xC0662CCCC0000000
+ %add10422 = fadd float %val, 0xC0662CCCC0000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext10424 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x402B333340000000, float 0x40735E6660000000, float 0xC0567999A0000000, float 2.050000e+02>, <4 x float>* undef
+ store volatile <4 x float> <float 0x402B333340000000, float 0x40735E6660000000, float 0xC0567999A0000000, float 2.050000e+02>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp958 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5899,7 +5899,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add10428 = fadd <4 x float> %tmp959, %tmp958
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add10428, <4 x float>* undef, align 16
+ store volatile <4 x float> %add10428, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp960 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5909,13 +5909,13 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp961 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add10436 = fadd float undef, 0xC06AF33340000000
+ %add10436 = fadd float %val, 0xC06AF33340000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp962 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10437 = insertelement <4 x float> %tmp962, float %add10436, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10437, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10437, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecext10438 = extractelement <4 x float> undef, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5925,9 +5925,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10440 = insertelement <4 x float> %tmp963, float %add10439, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10440, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10440, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC065E999A0000000, float 0x4067D33340000000, float 0xC070133340000000, float 0x406B666660000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0xC065E999A0000000, float 0x4067D33340000000, float 0xC070133340000000, float 0x406B666660000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp964 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5941,7 +5941,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10445 = insertelement <4 x float> %tmp966, float %add10444, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10445, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10445, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp967 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5953,7 +5953,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10448 = insertelement <4 x float> %tmp968, float %add10447, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10448, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10448, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp969 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5965,7 +5965,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10451 = insertelement <4 x float> %tmp970, float %add10450, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10451, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10451, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp971 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5975,7 +5975,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10454 = insertelement <4 x float> undef, float %add10453, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x406AFCCCC0000000, float 0xC07604CCC0000000, float 6.900000e+01, float 0xC060A66660000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x406AFCCCC0000000, float 0xC07604CCC0000000, float 6.900000e+01, float 0xC060A66660000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp972 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5983,7 +5983,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%add10456 = fadd <4 x float> %tmp973, %tmp972
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %add10456, <4 x float>* undef, align 16
+ store volatile <4 x float> %add10456, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp974 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5993,7 +5993,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10459 = insertelement <4 x float> undef, float %add10458, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10459, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10459, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp975 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -6015,7 +6015,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10465 = insertelement <4 x float> %tmp978, float %add10464, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10465, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10465, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp979 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -6027,9 +6027,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10468 = insertelement <4 x float> %tmp980, float %add10467, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10468, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10468, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x4078833340000000, float 0x40786CCCC0000000, float 0xC0468CCCC0000000, float 0xC0793199A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x4078833340000000, float 0x40786CCCC0000000, float 0xC0468CCCC0000000, float 0xC0793199A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp981 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -6045,7 +6045,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10473 = insertelement <4 x float> %tmp983, float %add10472, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10473, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10473, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp984 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -6057,15 +6057,15 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10476 = insertelement <4 x float> %tmp985, float %add10475, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10476, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10476, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add10489 = fadd float undef, 0x4074666660000000
+ %add10489 = fadd float %val, 0x4074666660000000
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp986 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10490 = insertelement <4 x float> %tmp986, float %add10489, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10490, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10490, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp987 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -6079,9 +6079,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10510 = insertelement <4 x float> %tmp989, float %add10509, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10510, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10510, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0x40656999A0000000, float 0xC073766660000000, float 1.685000e+02, float 0x40765199A0000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x40656999A0000000, float 0xC073766660000000, float 1.685000e+02, float 0x40765199A0000000>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp990 = load <4 x float>, <4 x float>* undef
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -6097,17 +6097,17 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10515 = insertelement <4 x float> %tmp992, float %add10514, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10515, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10515, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp993 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- %add10562 = fadd float undef, 2.035000e+02
+ %add10562 = fadd float %val, 2.035000e+02
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp994 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10563 = insertelement <4 x float> %tmp994, float %add10562, i32 2
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10563, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10563, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp995 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -6119,9 +6119,9 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10566 = insertelement <4 x float> %tmp996, float %add10565, i32 3
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10566, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10566, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> <float 0xC068B999A0000000, float 0xC050E66660000000, float 0xC0725999A0000000, float 0xC054D33340000000>, <4 x float>* %.compoundliteral10567
+ store volatile <4 x float> <float 0xC068B999A0000000, float 0xC050E66660000000, float 0xC0725999A0000000, float 0xC054D33340000000>, <4 x float>* %.compoundliteral10567
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp997 = load <4 x float>, <4 x float>* %.compoundliteral10567
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -6139,7 +6139,7 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10571 = insertelement <4 x float> %tmp1000, float %add10570, i32 0
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10571, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10571, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%tmp1001 = load <4 x float>, <4 x float>* undef, align 16
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -6151,56 +6151,56 @@ entry:
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
%vecins10574 = insertelement <4 x float> %tmp1002, float %add10573, i32 1
tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
- store <4 x float> %vecins10574, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10574, <4 x float>* undef, align 16
%tmp1003 = load <4 x float>, <4 x float>* undef, align 16
%vecext10575 = extractelement <4 x float> %tmp1003, i32 2
%tmp1004 = load <4 x float>, <4 x float>* undef, align 16
- %vecins10577 = insertelement <4 x float> %tmp1004, float undef, i32 2
- store <4 x float> %vecins10577, <4 x float>* undef, align 16
+ %vecins10577 = insertelement <4 x float> %tmp1004, float %val, i32 2
+ store volatile <4 x float> %vecins10577, <4 x float>* undef, align 16
%tmp1005 = load <4 x float>, <4 x float>* undef, align 16
%vecext10578 = extractelement <4 x float> %tmp1005, i32 3
%add10579 = fadd float %vecext10578, 0x4076566660000000
%tmp1006 = load <4 x float>, <4 x float>* undef, align 16
%vecins10580 = insertelement <4 x float> %tmp1006, float %add10579, i32 3
- store <4 x float> %vecins10580, <4 x float>* undef, align 16
- store <4 x float> <float 0x407CAB3340000000, float 1.685000e+02, float 0xC07B866660000000, float 0xC061ACCCC0000000>, <4 x float>* %.compoundliteral10581
+ store volatile <4 x float> %vecins10580, <4 x float>* undef, align 16
+ store volatile <4 x float> <float 0x407CAB3340000000, float 1.685000e+02, float 0xC07B866660000000, float 0xC061ACCCC0000000>, <4 x float>* %.compoundliteral10581
%tmp1007 = load <4 x float>, <4 x float>* %.compoundliteral10581
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
%tmp1008 = load <4 x float>, <4 x float>* undef, align 16
%vecext10583 = extractelement <4 x float> %tmp1008, i32 0
%add10584 = fadd float %vecext10583, 0xC060533340000000
%tmp1009 = load <4 x float>, <4 x float>* undef, align 16
%vecins10585 = insertelement <4 x float> %tmp1009, float %add10584, i32 0
- store <4 x float> %vecins10585, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10585, <4 x float>* undef, align 16
%tmp1010 = load <4 x float>, <4 x float>* undef, align 16
%vecext10586 = extractelement <4 x float> %tmp1010, i32 1
%add10587 = fadd float %vecext10586, 0xC0694CCCC0000000
%tmp1011 = load <4 x float>, <4 x float>* undef, align 16
%vecins10588 = insertelement <4 x float> %tmp1011, float %add10587, i32 1
- store <4 x float> %vecins10588, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10588, <4 x float>* undef, align 16
%tmp1012 = load <4 x float>, <4 x float>* undef, align 16
%vecext10589 = extractelement <4 x float> %tmp1012, i32 2
%add10590 = fadd float %vecext10589, 0xC0541999A0000000
%tmp1013 = load <4 x float>, <4 x float>* undef, align 16
%vecins10591 = insertelement <4 x float> %tmp1013, float %add10590, i32 2
- store <4 x float> %vecins10591, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10591, <4 x float>* undef, align 16
%tmp1014 = load <4 x float>, <4 x float>* undef, align 16
%vecext10592 = extractelement <4 x float> %tmp1014, i32 3
%add10593 = fadd float %vecext10592, 0xC06C566660000000
%tmp1015 = load <4 x float>, <4 x float>* undef, align 16
%vecins10594 = insertelement <4 x float> %tmp1015, float %add10593, i32 3
- store <4 x float> %vecins10594, <4 x float>* undef, align 16
- store <4 x float> <float 0x407A3199A0000000, float 0xC0659999A0000000, float 0x407E0999A0000000, float 0xC0334CCCC0000000>, <4 x float>* %.compoundliteral10595
+ store volatile <4 x float> %vecins10594, <4 x float>* undef, align 16
+ store volatile <4 x float> <float 0x407A3199A0000000, float 0xC0659999A0000000, float 0x407E0999A0000000, float 0xC0334CCCC0000000>, <4 x float>* %.compoundliteral10595
%tmp1016 = load <4 x float>, <4 x float>* %.compoundliteral10595
%tmp1017 = load <4 x float>, <4 x float>* undef, align 16
%add10596 = fadd <4 x float> %tmp1017, %tmp1016
- store <4 x float> %add10596, <4 x float>* undef, align 16
+ store volatile <4 x float> %add10596, <4 x float>* undef, align 16
%tmp1018 = load <4 x float>, <4 x float>* undef, align 16
%vecext10597 = extractelement <4 x float> %tmp1018, i32 0
%add10598 = fadd float %vecext10597, 0x40640999A0000000
%tmp1019 = load <4 x float>, <4 x float>* undef, align 16
%vecins10599 = insertelement <4 x float> %tmp1019, float %add10598, i32 0
- store <4 x float> %vecins10599, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10599, <4 x float>* undef, align 16
%tmp1020 = load <4 x float>, <4 x float>* undef, align 16
%vecext10600 = extractelement <4 x float> %tmp1020, i32 1
%add10601 = fadd float %vecext10600, 0xC073966660000000
@@ -6211,48 +6211,48 @@ entry:
%add10604 = fadd float %vecext10603, 1.780000e+02
%tmp1023 = load <4 x float>, <4 x float>* undef, align 16
%vecins10605 = insertelement <4 x float> %tmp1023, float %add10604, i32 2
- store <4 x float> %vecins10605, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10605, <4 x float>* undef, align 16
%tmp1024 = load <4 x float>, <4 x float>* undef, align 16
- %add10607 = fadd float undef, 0x4070A33340000000
+ %add10607 = fadd float %val, 0x4070A33340000000
%tmp1025 = load <4 x float>, <4 x float>* undef, align 16
- store <4 x float> <float 0x407C5999A0000000, float 0x4046733340000000, float 0xC06E6CCCC0000000, float 0xC063C33340000000>, <4 x float>* %.compoundliteral10609
+ store volatile <4 x float> <float 0x407C5999A0000000, float 0x4046733340000000, float 0xC06E6CCCC0000000, float 0xC063C33340000000>, <4 x float>* %.compoundliteral10609
%tmp1026 = load <4 x float>, <4 x float>* %.compoundliteral10609
%tmp1027 = load <4 x float>, <4 x float>* undef, align 16
%tmp1028 = load <4 x float>, <4 x float>* undef, align 16
%vecext10611 = extractelement <4 x float> %tmp1028, i32 0
%add10612 = fadd float %vecext10611, 0x40757199A0000000
%vecins10613 = insertelement <4 x float> undef, float %add10612, i32 0
- store <4 x float> %vecins10613, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10613, <4 x float>* undef, align 16
%tmp1029 = load <4 x float>, <4 x float>* undef, align 16
%vecext10614 = extractelement <4 x float> %tmp1029, i32 1
%add10615 = fadd float %vecext10614, 0x40740CCCC0000000
%tmp1030 = load <4 x float>, <4 x float>* undef, align 16
%vecins10616 = insertelement <4 x float> %tmp1030, float %add10615, i32 1
- store <4 x float> %vecins10616, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10616, <4 x float>* undef, align 16
%tmp1031 = load <4 x float>, <4 x float>* undef, align 16
%vecext10617 = extractelement <4 x float> %tmp1031, i32 2
%add10618 = fadd float %vecext10617, 0xC012CCCCC0000000
%tmp1032 = load <4 x float>, <4 x float>* undef, align 16
%vecins10619 = insertelement <4 x float> %tmp1032, float %add10618, i32 2
- store <4 x float> %vecins10619, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10619, <4 x float>* undef, align 16
%tmp1033 = load <4 x float>, <4 x float>* undef, align 16
%vecext10620 = extractelement <4 x float> %tmp1033, i32 3
%add10621 = fadd float %vecext10620, 0x406E566660000000
%tmp1034 = load <4 x float>, <4 x float>* undef, align 16
- store <4 x float> <float 0x407B2199A0000000, float 0xC07D9CCCC0000000, float -4.350000e+01, float 0xC07D3B3340000000>, <4 x float>* %.compoundliteral10623
+ store volatile <4 x float> <float 0x407B2199A0000000, float 0xC07D9CCCC0000000, float -4.350000e+01, float 0xC07D3B3340000000>, <4 x float>* %.compoundliteral10623
%tmp1035 = load <4 x float>, <4 x float>* %.compoundliteral10623
%add10624 = fadd <4 x float> undef, %tmp1035
%tmp1036 = load <4 x float>, <4 x float>* undef, align 16
%vecext10625 = extractelement <4 x float> %tmp1036, i32 0
%tmp1037 = load <4 x float>, <4 x float>* undef, align 16
- %vecins10627 = insertelement <4 x float> %tmp1037, float undef, i32 0
- store <4 x float> %vecins10627, <4 x float>* undef, align 16
+ %vecins10627 = insertelement <4 x float> %tmp1037, float %val, i32 0
+ store volatile <4 x float> %vecins10627, <4 x float>* undef, align 16
%tmp1038 = load <4 x float>, <4 x float>* undef, align 16
%vecext10628 = extractelement <4 x float> %tmp1038, i32 1
%add10629 = fadd float %vecext10628, 0x407E3CCCC0000000
%tmp1039 = load <4 x float>, <4 x float>* undef, align 16
%vecins10630 = insertelement <4 x float> %tmp1039, float %add10629, i32 1
- store <4 x float> %vecins10630, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10630, <4 x float>* undef, align 16
%tmp1040 = load <4 x float>, <4 x float>* undef, align 16
%vecext10631 = extractelement <4 x float> %tmp1040, i32 2
%tmp1041 = load <4 x float>, <4 x float>* undef, align 16
@@ -6261,8 +6261,8 @@ entry:
%add10635 = fadd float %vecext10634, 0xC067533340000000
%tmp1043 = load <4 x float>, <4 x float>* undef, align 16
%vecins10636 = insertelement <4 x float> %tmp1043, float %add10635, i32 3
- store <4 x float> %vecins10636, <4 x float>* undef, align 16
- store <4 x float> <float 1.950000e+02, float 0x407E8E6660000000, float 0x407D7CCCC0000000, float 0x407E166660000000>, <4 x float>* %.compoundliteral10637
+ store volatile <4 x float> %vecins10636, <4 x float>* undef, align 16
+ store volatile <4 x float> <float 1.950000e+02, float 0x407E8E6660000000, float 0x407D7CCCC0000000, float 0x407E166660000000>, <4 x float>* %.compoundliteral10637
%tmp1044 = load <4 x float>, <4 x float>* undef, align 16
%add10638 = fadd <4 x float> %tmp1044, undef
%tmp1045 = load <4 x float>, <4 x float>* undef, align 16
@@ -6270,94 +6270,94 @@ entry:
%add10640 = fadd float %vecext10639, 0x406CA33340000000
%tmp1046 = load <4 x float>, <4 x float>* undef, align 16
%vecins10641 = insertelement <4 x float> %tmp1046, float %add10640, i32 0
- store <4 x float> %vecins10641, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10641, <4 x float>* undef, align 16
%tmp1047 = load <4 x float>, <4 x float>* undef, align 16
%vecext10642 = extractelement <4 x float> %tmp1047, i32 1
%add10643 = fadd float %vecext10642, 0xC07C8999A0000000
%tmp1048 = load <4 x float>, <4 x float>* undef, align 16
%vecins10644 = insertelement <4 x float> %tmp1048, float %add10643, i32 1
- store <4 x float> %vecins10644, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10644, <4 x float>* undef, align 16
%tmp1049 = load <4 x float>, <4 x float>* undef, align 16
%vecext10645 = extractelement <4 x float> %tmp1049, i32 2
%tmp1050 = load <4 x float>, <4 x float>* undef, align 16
%tmp1051 = load <4 x float>, <4 x float>* undef, align 16
- %vecins10748 = insertelement <4 x float> undef, float undef, i32 3
+ %vecins10748 = insertelement <4 x float> undef, float %val, i32 3
%tmp1052 = load <4 x float>, <4 x float>* %.compoundliteral10749
%add10750 = fadd <4 x float> undef, %tmp1052
- store <4 x float> %add10750, <4 x float>* undef, align 16
+ store volatile <4 x float> %add10750, <4 x float>* undef, align 16
%tmp1053 = load <4 x float>, <4 x float>* undef, align 16
%vecext10751 = extractelement <4 x float> %tmp1053, i32 0
%add10752 = fadd float %vecext10751, 0x4071B33340000000
%tmp1054 = load <4 x float>, <4 x float>* undef, align 16
%vecins10753 = insertelement <4 x float> %tmp1054, float %add10752, i32 0
- store <4 x float> %vecins10753, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10753, <4 x float>* undef, align 16
%tmp1055 = load <4 x float>, <4 x float>* undef, align 16
%vecext10754 = extractelement <4 x float> %tmp1055, i32 1
%add10755 = fadd float %vecext10754, 0xC076A66660000000
%tmp1056 = load <4 x float>, <4 x float>* undef, align 16
%vecins10756 = insertelement <4 x float> %tmp1056, float %add10755, i32 1
- store <4 x float> %vecins10756, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10756, <4 x float>* undef, align 16
%tmp1057 = load <4 x float>, <4 x float>* undef, align 16
%vecext10757 = extractelement <4 x float> %tmp1057, i32 2
%add10758 = fadd float %vecext10757, 3.800000e+01
%tmp1058 = load <4 x float>, <4 x float>* undef, align 16
%vecins10759 = insertelement <4 x float> %tmp1058, float %add10758, i32 2
- store <4 x float> %vecins10759, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10759, <4 x float>* undef, align 16
%tmp1059 = load <4 x float>, <4 x float>* undef, align 16
%vecext10760 = extractelement <4 x float> %tmp1059, i32 3
- store <4 x float> undef, <4 x float>* undef, align 16
- store <4 x float> <float 0xC075BB3340000000, float 0x4074D4CCC0000000, float 0xC07A466660000000, float 0xC0691CCCC0000000>, <4 x float>* %.compoundliteral10763
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> <float 0xC075BB3340000000, float 0x4074D4CCC0000000, float 0xC07A466660000000, float 0xC0691CCCC0000000>, <4 x float>* %.compoundliteral10763
%tmp1060 = load <4 x float>, <4 x float>* %.compoundliteral10763
%tmp1061 = load <4 x float>, <4 x float>* undef, align 16
%tmp1062 = load <4 x float>, <4 x float>* undef, align 16
- %add10985 = fadd float undef, 0x405E933340000000
+ %add10985 = fadd float %val, 0x405E933340000000
%tmp1063 = load <4 x float>, <4 x float>* undef, align 16
%vecins10986 = insertelement <4 x float> %tmp1063, float %add10985, i32 3
- store <4 x float> %vecins10986, <4 x float>* undef, align 16
- store <4 x float> <float 0xC0721E6660000000, float -4.180000e+02, float 0x406F366660000000, float 0xC055F999A0000000>, <4 x float>* %.compoundliteral10987
+ store volatile <4 x float> %vecins10986, <4 x float>* undef, align 16
+ store volatile <4 x float> <float 0xC0721E6660000000, float -4.180000e+02, float 0x406F366660000000, float 0xC055F999A0000000>, <4 x float>* %.compoundliteral10987
%tmp1064 = load <4 x float>, <4 x float>* %.compoundliteral10987
%tmp1065 = load <4 x float>, <4 x float>* undef, align 16
- %vecins10994 = insertelement <4 x float> %tmp1065, float undef, i32 1
+ %vecins10994 = insertelement <4 x float> %tmp1065, float %val, i32 1
%tmp1066 = load <4 x float>, <4 x float>* undef, align 16
%vecext10995 = extractelement <4 x float> %tmp1066, i32 2
%add10996 = fadd float %vecext10995, 0x406F9999A0000000
%tmp1067 = load <4 x float>, <4 x float>* undef, align 16
%vecins10997 = insertelement <4 x float> %tmp1067, float %add10996, i32 2
- store <4 x float> %vecins10997, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins10997, <4 x float>* undef, align 16
%tmp1068 = load <4 x float>, <4 x float>* undef, align 16
%vecext10998 = extractelement <4 x float> %tmp1068, i32 3
%add10999 = fadd float %vecext10998, -2.765000e+02
%tmp1069 = load <4 x float>, <4 x float>* undef, align 16
%vecins11000 = insertelement <4 x float> %tmp1069, float %add10999, i32 3
- store <4 x float> %vecins11000, <4 x float>* undef, align 16
- store <4 x float> <float 0x4078F999A0000000, float 0xC06D166660000000, float 0x40501999A0000000, float 0x406FC999A0000000>, <4 x float>* %.compoundliteral11001
+ store volatile <4 x float> %vecins11000, <4 x float>* undef, align 16
+ store volatile <4 x float> <float 0x4078F999A0000000, float 0xC06D166660000000, float 0x40501999A0000000, float 0x406FC999A0000000>, <4 x float>* %.compoundliteral11001
%tmp1070 = load <4 x float>, <4 x float>* undef, align 16
%add11002 = fadd <4 x float> %tmp1070, undef
%vecext11003 = extractelement <4 x float> undef, i32 0
%vecext11009 = extractelement <4 x float> undef, i32 2
%tmp1071 = load <4 x float>, <4 x float>* undef, align 16
- %vecins11033 = insertelement <4 x float> %tmp1071, float undef, i32 0
- store <4 x float> %vecins11033, <4 x float>* undef, align 16
+ %vecins11033 = insertelement <4 x float> %tmp1071, float %val, i32 0
+ store volatile <4 x float> %vecins11033, <4 x float>* undef, align 16
%tmp1072 = load <4 x float>, <4 x float>* undef, align 16
%vecext11034 = extractelement <4 x float> %tmp1072, i32 1
%add11035 = fadd float %vecext11034, 0x4056D33340000000
%tmp1073 = load <4 x float>, <4 x float>* undef, align 16
%vecins11036 = insertelement <4 x float> %tmp1073, float %add11035, i32 1
- store <4 x float> %vecins11036, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins11036, <4 x float>* undef, align 16
%tmp1074 = load <4 x float>, <4 x float>* undef, align 16
%vecext11037 = extractelement <4 x float> %tmp1074, i32 2
%add11038 = fadd float %vecext11037, 0xC06EA33340000000
%tmp1075 = load <4 x float>, <4 x float>* undef, align 16
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
%tmp1076 = load <4 x float>, <4 x float>* undef, align 16
%vecext11040 = extractelement <4 x float> %tmp1076, i32 3
%add11041 = fadd float %vecext11040, 0x40746CCCC0000000
%tmp1077 = load <4 x float>, <4 x float>* undef, align 16
%vecins11042 = insertelement <4 x float> %tmp1077, float %add11041, i32 3
- store <4 x float> <float 0x405DD999A0000000, float -3.775000e+02, float -1.265000e+02, float 0xC065C66660000000>, <4 x float>* undef
+ store volatile <4 x float> <float 0x405DD999A0000000, float -3.775000e+02, float -1.265000e+02, float 0xC065C66660000000>, <4 x float>* undef
%tmp1078 = load <4 x float>, <4 x float>* undef, align 16
%add11044 = fadd <4 x float> %tmp1078, undef
- store <4 x float> %add11044, <4 x float>* undef, align 16
+ store volatile <4 x float> %add11044, <4 x float>* undef, align 16
%tmp1079 = load <4 x float>, <4 x float>* undef, align 16
%vecext11045 = extractelement <4 x float> %tmp1079, i32 0
%add11046 = fadd float %vecext11045, 0xC076E66660000000
@@ -6366,58 +6366,58 @@ entry:
%tmp1081 = load <4 x float>, <4 x float>* undef, align 16
%vecext11048 = extractelement <4 x float> %tmp1081, i32 1
%add11049 = fadd float %vecext11048, 4.100000e+02
- %vecins11064 = insertelement <4 x float> undef, float undef, i32 1
- %add11074 = fadd float undef, 0xC06FF999A0000000
+ %vecins11064 = insertelement <4 x float> undef, float %val, i32 1
+ %add11074 = fadd float %val, 0xC06FF999A0000000
%tmp1082 = load <4 x float>, <4 x float>* undef, align 16
%vecins11075 = insertelement <4 x float> %tmp1082, float %add11074, i32 0
- store <4 x float> %vecins11075, <4 x float>* undef, align 16
- %add11077 = fadd float undef, 0xC075D33340000000
+ store volatile <4 x float> %vecins11075, <4 x float>* undef, align 16
+ %add11077 = fadd float %val, 0xC075D33340000000
%tmp1083 = load <4 x float>, <4 x float>* undef, align 16
%tmp1084 = load <4 x float>, <4 x float>* undef, align 16
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
%tmp1085 = load <4 x float>, <4 x float>* undef, align 16
%vecext11093 = extractelement <4 x float> %tmp1085, i32 2
%add11094 = fadd float %vecext11093, 0xC07CD66660000000
%tmp1086 = load <4 x float>, <4 x float>* undef, align 16
%vecins11095 = insertelement <4 x float> %tmp1086, float %add11094, i32 2
- store <4 x float> %vecins11095, <4 x float>* undef, align 16
- store <4 x float> undef, <4 x float>* undef, align 16
- store <4 x float> <float 0x4061F66660000000, float 0xC076DB3340000000, float 0xC055A66660000000, float 2.415000e+02>, <4 x float>* undef
+ store volatile <4 x float> %vecins11095, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> <float 0x4061F66660000000, float 0xC076DB3340000000, float 0xC055A66660000000, float 2.415000e+02>, <4 x float>* undef
%tmp1087 = load <4 x float>, <4 x float>* undef
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
%tmp1088 = load <4 x float>, <4 x float>* undef, align 16
%vecext11513 = extractelement <4 x float> %tmp1088, i32 2
%add11514 = fadd float %vecext11513, 0xC07C7199A0000000
%vecins11515 = insertelement <4 x float> undef, float %add11514, i32 2
- store <4 x float> %vecins11515, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins11515, <4 x float>* undef, align 16
%add11520 = fadd <4 x float> undef, undef
- store <4 x float> %add11520, <4 x float>* undef, align 16
+ store volatile <4 x float> %add11520, <4 x float>* undef, align 16
%vecext11521 = extractelement <4 x float> undef, i32 0
%add11522 = fadd float %vecext11521, 0x4041733340000000
%tmp1089 = load <4 x float>, <4 x float>* undef, align 16
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
%tmp1090 = load <4 x float>, <4 x float>* undef
%tmp1091 = load <4 x float>, <4 x float>* undef, align 16
%add11562 = fadd <4 x float> %tmp1091, %tmp1090
%tmp1092 = load <4 x float>, <4 x float>* undef, align 16
- %add11564 = fadd float undef, 0xC0411999A0000000
+ %add11564 = fadd float %val, 0xC0411999A0000000
%tmp1093 = load <4 x float>, <4 x float>* undef, align 16
%vecins11565 = insertelement <4 x float> %tmp1093, float %add11564, i32 0
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
%vecext11586 = extractelement <4 x float> undef, i32 3
%add11587 = fadd float %vecext11586, 3.760000e+02
%tmp1094 = load <4 x float>, <4 x float>* undef, align 16
- store <4 x float> undef, <4 x float>* undef, align 16
- store <4 x float> <float 0xC06ED999A0000000, float 1.380000e+02, float 0xC073AB3340000000, float 0x4078A66660000000>, <4 x float>* undef
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> <float 0xC06ED999A0000000, float 1.380000e+02, float 0xC073AB3340000000, float 0x4078A66660000000>, <4 x float>* undef
%tmp1095 = load <4 x float>, <4 x float>* undef
%tmp1096 = load <4 x float>, <4 x float>* undef, align 16
%tmp1097 = load <4 x float>, <4 x float>* undef, align 16
%tmp1098 = load <4 x float>, <4 x float>* undef, align 16
- %vecins11593 = insertelement <4 x float> %tmp1098, float undef, i32 0
+ %vecins11593 = insertelement <4 x float> %tmp1098, float %val, i32 0
%vecext11594 = extractelement <4 x float> undef, i32 1
%tmp1099 = load <4 x float>, <4 x float>* undef, align 16
- %vecins11596 = insertelement <4 x float> %tmp1099, float undef, i32 1
- store <4 x float> %vecins11596, <4 x float>* undef, align 16
+ %vecins11596 = insertelement <4 x float> %tmp1099, float %val, i32 1
+ store volatile <4 x float> %vecins11596, <4 x float>* undef, align 16
%tmp1100 = load <4 x float>, <4 x float>* undef, align 16
%vecext11597 = extractelement <4 x float> %tmp1100, i32 2
%add11598 = fadd float %vecext11597, 0x40430CCCC0000000
@@ -6426,34 +6426,34 @@ entry:
%tmp1102 = load <4 x float>, <4 x float>* undef, align 16
%vecext11600 = extractelement <4 x float> %tmp1102, i32 3
%tmp1103 = load <4 x float>, <4 x float>* undef, align 16
- %vecins11602 = insertelement <4 x float> %tmp1103, float undef, i32 3
- store <4 x float> %vecins11602, <4 x float>* undef, align 16
+ %vecins11602 = insertelement <4 x float> %tmp1103, float %val, i32 3
+ store volatile <4 x float> %vecins11602, <4 x float>* undef, align 16
%tmp1104 = load <4 x float>, <4 x float>* undef
%tmp1105 = load <4 x float>, <4 x float>* undef, align 16
%add11604 = fadd <4 x float> %tmp1105, %tmp1104
%tmp1106 = load <4 x float>, <4 x float>* undef, align 16
%vecext11605 = extractelement <4 x float> %tmp1106, i32 0
%tmp1107 = load <4 x float>, <4 x float>* undef, align 16
- %vecins11607 = insertelement <4 x float> %tmp1107, float undef, i32 0
- %vecins11621 = insertelement <4 x float> undef, float undef, i32 0
- %vecins11630 = insertelement <4 x float> undef, float undef, i32 3
- store <4 x float> %vecins11630, <4 x float>* undef, align 16
- store <4 x float> <float -1.190000e+02, float 0x402F666660000000, float 0xC07BD33340000000, float -1.595000e+02>, <4 x float>* %.compoundliteral11631
+ %vecins11607 = insertelement <4 x float> %tmp1107, float %val, i32 0
+ %vecins11621 = insertelement <4 x float> undef, float %val, i32 0
+ %vecins11630 = insertelement <4 x float> undef, float %val, i32 3
+ store volatile <4 x float> %vecins11630, <4 x float>* undef, align 16
+ store volatile <4 x float> <float -1.190000e+02, float 0x402F666660000000, float 0xC07BD33340000000, float -1.595000e+02>, <4 x float>* %.compoundliteral11631
%tmp1108 = load <4 x float>, <4 x float>* %.compoundliteral11631
%tmp1109 = load <4 x float>, <4 x float>* undef, align 16
- store <4 x float> undef, <4 x float>* undef, align 16
- %add11634 = fadd float undef, -1.075000e+02
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
+ %add11634 = fadd float %val, -1.075000e+02
%vecext11647 = extractelement <4 x float> undef, i32 0
%add11648 = fadd float %vecext11647, 0x40775999A0000000
%tmp1110 = load <4 x float>, <4 x float>* undef, align 16
%vecext11650 = extractelement <4 x float> undef, i32 1
%tmp1111 = load <4 x float>, <4 x float>* undef, align 16
- %vecins11784 = insertelement <4 x float> %tmp1111, float undef, i32 3
- store <4 x float> %vecins11784, <4 x float>* undef, align 16
- store <4 x float> <float 1.605000e+02, float 0x4068366660000000, float 2.820000e+02, float 0x407CF66660000000>, <4 x float>* %.compoundliteral11785
+ %vecins11784 = insertelement <4 x float> %tmp1111, float %val, i32 3
+ store volatile <4 x float> %vecins11784, <4 x float>* undef, align 16
+ store volatile <4 x float> <float 1.605000e+02, float 0x4068366660000000, float 2.820000e+02, float 0x407CF66660000000>, <4 x float>* %.compoundliteral11785
%tmp1112 = load <4 x float>, <4 x float>* %.compoundliteral11785
%add11786 = fadd <4 x float> undef, %tmp1112
- store <4 x float> %add11786, <4 x float>* undef, align 16
+ store volatile <4 x float> %add11786, <4 x float>* undef, align 16
%tmp1113 = load <4 x float>, <4 x float>* undef, align 16
%vecext11787 = extractelement <4 x float> %tmp1113, i32 0
%vecext11807 = extractelement <4 x float> undef, i32 2
@@ -6463,60 +6463,60 @@ entry:
%add11811 = fadd float %vecext11810, 0x4068F66660000000
%tmp1115 = load <4 x float>, <4 x float>* undef, align 16
%vecins11812 = insertelement <4 x float> %tmp1115, float %add11811, i32 3
- store <4 x float> %vecins11812, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins11812, <4 x float>* undef, align 16
%tmp1116 = load <4 x float>, <4 x float>* undef
%tmp1117 = load <4 x float>, <4 x float>* undef, align 16
%vecext11958 = extractelement <4 x float> undef, i32 1
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
%vecext11961 = extractelement <4 x float> undef, i32 2
%add11962 = fadd float %vecext11961, -3.680000e+02
%tmp1118 = load <4 x float>, <4 x float>* undef, align 16
- store <4 x float> undef, <4 x float>* undef, align 16
- %add11965 = fadd float undef, 0x4061133340000000
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
+ %add11965 = fadd float %val, 0x4061133340000000
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
%tmp1119 = load <4 x float>, <4 x float>* undef, align 16
%vecext11975 = extractelement <4 x float> %tmp1119, i32 2
%tmp1120 = load <4 x float>, <4 x float>* undef, align 16
- %vecins11977 = insertelement <4 x float> %tmp1120, float undef, i32 2
- store <4 x float> %vecins11977, <4 x float>* undef, align 16
+ %vecins11977 = insertelement <4 x float> %tmp1120, float %val, i32 2
+ store volatile <4 x float> %vecins11977, <4 x float>* undef, align 16
%vecext11978 = extractelement <4 x float> undef, i32 3
%add11979 = fadd float %vecext11978, 0xC0688999A0000000
%tmp1121 = load <4 x float>, <4 x float>* undef, align 16
%vecins11980 = insertelement <4 x float> %tmp1121, float %add11979, i32 3
- store <4 x float> %vecins11980, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins11980, <4 x float>* undef, align 16
%add11982 = fadd <4 x float> undef, undef
- store <4 x float> %add11982, <4 x float>* undef, align 16
+ store volatile <4 x float> %add11982, <4 x float>* undef, align 16
%tmp1122 = load <4 x float>, <4 x float>* undef, align 16
%vecext11983 = extractelement <4 x float> %tmp1122, i32 0
%add11984 = fadd float %vecext11983, 0xC075966660000000
%tmp1123 = load <4 x float>, <4 x float>* undef, align 16
- %vecins12005 = insertelement <4 x float> undef, float undef, i32 2
- store <4 x float> %vecins12005, <4 x float>* undef, align 16
+ %vecins12005 = insertelement <4 x float> undef, float %val, i32 2
+ store volatile <4 x float> %vecins12005, <4 x float>* undef, align 16
%tmp1124 = load <4 x float>, <4 x float>* undef, align 16
- %add12007 = fadd float undef, 0xC07124CCC0000000
+ %add12007 = fadd float %val, 0xC07124CCC0000000
%vecins12008 = insertelement <4 x float> undef, float %add12007, i32 3
- store <4 x float> %vecins12008, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins12008, <4 x float>* undef, align 16
%tmp1125 = load <4 x float>, <4 x float>* undef, align 16
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
%tmp1126 = load <4 x float>, <4 x float>* undef, align 16
- %add12012 = fadd float undef, 0xC0750CCCC0000000
+ %add12012 = fadd float %val, 0xC0750CCCC0000000
%tmp1127 = load <4 x float>, <4 x float>* undef, align 16
%vecins12013 = insertelement <4 x float> %tmp1127, float %add12012, i32 0
- store <4 x float> %vecins12013, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins12013, <4 x float>* undef, align 16
%tmp1128 = load <4 x float>, <4 x float>* undef, align 16
- %add12015 = fadd float undef, 0x4079CE6660000000
+ %add12015 = fadd float %val, 0x4079CE6660000000
%tmp1129 = load <4 x float>, <4 x float>* undef, align 16
%vecins12016 = insertelement <4 x float> %tmp1129, float %add12015, i32 1
- store <4 x float> %vecins12016, <4 x float>* undef, align 16
- %add12018 = fadd float undef, 3.555000e+02
+ store volatile <4 x float> %vecins12016, <4 x float>* undef, align 16
+ %add12018 = fadd float %val, 3.555000e+02
%tmp1130 = load <4 x float>, <4 x float>* undef, align 16
%vecins12019 = insertelement <4 x float> %tmp1130, float %add12018, i32 2
%tmp1131 = load <4 x float>, <4 x float>* undef, align 16
%vecext12020 = extractelement <4 x float> %tmp1131, i32 3
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
%vecext12028 = extractelement <4 x float> undef, i32 1
- store <4 x float> undef, <4 x float>* undef, align 16
- store <4 x float> <float 0x40791999A0000000, float 0x407C7CCCC0000000, float 0x4070F33340000000, float 0xC056ECCCC0000000>, <4 x float>* undef
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> <float 0x40791999A0000000, float 0x407C7CCCC0000000, float 0x4070F33340000000, float 0xC056ECCCC0000000>, <4 x float>* undef
%tmp1132 = load <4 x float>, <4 x float>* undef, align 16
%add12038 = fadd <4 x float> %tmp1132, undef
%tmp1133 = load <4 x float>, <4 x float>* undef, align 16
@@ -6524,27 +6524,27 @@ entry:
%add12043 = fadd float %vecext12042, 0x402F9999A0000000
%tmp1134 = load <4 x float>, <4 x float>* undef, align 16
%vecins12044 = insertelement <4 x float> %tmp1134, float %add12043, i32 1
- store <4 x float> %vecins12044, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins12044, <4 x float>* undef, align 16
%vecext12045 = extractelement <4 x float> undef, i32 2
%add12046 = fadd float %vecext12045, 0xC07EF33340000000
%tmp1135 = load <4 x float>, <4 x float>* undef, align 16
%vecins12047 = insertelement <4 x float> %tmp1135, float %add12046, i32 2
- store <4 x float> %vecins12047, <4 x float>* undef, align 16
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins12047, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
%tmp1136 = load <4 x float>, <4 x float>* undef, align 16
%vecext12112 = extractelement <4 x float> %tmp1136, i32 1
%tmp1137 = load <4 x float>, <4 x float>* undef, align 16
- store <4 x float> undef, <4 x float>* undef, align 16
- %add12116 = fadd float undef, 0xC074F4CCC0000000
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
+ %add12116 = fadd float %val, 0xC074F4CCC0000000
%tmp1138 = load <4 x float>, <4 x float>* undef, align 16
%vecins12117 = insertelement <4 x float> %tmp1138, float %add12116, i32 2
- store <4 x float> %vecins12117, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins12117, <4 x float>* undef, align 16
%tmp1139 = load <4 x float>, <4 x float>* undef, align 16
%vecext12118 = extractelement <4 x float> %tmp1139, i32 3
%add12119 = fadd float %vecext12118, 0xC0638CCCC0000000
%tmp1140 = load <4 x float>, <4 x float>* undef, align 16
%vecins12120 = insertelement <4 x float> %tmp1140, float %add12119, i32 3
- %add12152 = fadd float undef, 0x4039333340000000
+ %add12152 = fadd float %val, 0x4039333340000000
%tmp1141 = load <4 x float>, <4 x float>* undef, align 16
%vecins12153 = insertelement <4 x float> %tmp1141, float %add12152, i32 0
%vecext12154 = extractelement <4 x float> undef, i32 1
@@ -6561,67 +6561,67 @@ entry:
%add12161 = fadd float %vecext12160, 0x407B1999A0000000
%tmp1146 = load <4 x float>, <4 x float>* undef, align 16
%vecins12162 = insertelement <4 x float> %tmp1146, float %add12161, i32 3
- store <4 x float> %vecins12162, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins12162, <4 x float>* undef, align 16
%tmp1147 = load <4 x float>, <4 x float>* undef
%tmp1148 = load <4 x float>, <4 x float>* undef, align 16
%tmp1149 = load <4 x float>, <4 x float>* undef, align 16
%vecext12182 = extractelement <4 x float> %tmp1149, i32 1
%tmp1150 = load <4 x float>, <4 x float>* undef, align 16
- store <4 x float> undef, <4 x float>* undef, align 16
- store <4 x float> <float 0x4061833340000000, float 0x405CA66660000000, float -1.275000e+02, float 0x405BC66660000000>, <4 x float>* undef
- %add12208 = fadd float undef, 0x407854CCC0000000
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> <float 0x4061833340000000, float 0x405CA66660000000, float -1.275000e+02, float 0x405BC66660000000>, <4 x float>* undef
+ %add12208 = fadd float %val, 0x407854CCC0000000
%tmp1151 = load <4 x float>, <4 x float>* undef, align 16
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
%tmp1152 = load <4 x float>, <4 x float>* undef, align 16
%tmp1153 = load <4 x float>, <4 x float>* undef, align 16
- %vecins12218 = insertelement <4 x float> undef, float undef, i32 3
- store <4 x float> %vecins12218, <4 x float>* undef, align 16
- store <4 x float> <float 0x407C3CCCC0000000, float 0xC057C66660000000, float 2.605000e+02, float 0xC07974CCC0000000>, <4 x float>* undef
+ %vecins12218 = insertelement <4 x float> undef, float %val, i32 3
+ store volatile <4 x float> %vecins12218, <4 x float>* undef, align 16
+ store volatile <4 x float> <float 0x407C3CCCC0000000, float 0xC057C66660000000, float 2.605000e+02, float 0xC07974CCC0000000>, <4 x float>* undef
%tmp1154 = load <4 x float>, <4 x float>* undef
%tmp1155 = load <4 x float>, <4 x float>* undef, align 16
%add12220 = fadd <4 x float> %tmp1155, %tmp1154
%tmp1156 = load <4 x float>, <4 x float>* undef, align 16
%tmp1157 = load <4 x float>, <4 x float>* undef, align 16
- %vecins12223 = insertelement <4 x float> %tmp1157, float undef, i32 0
- store <4 x float> %vecins12223, <4 x float>* undef, align 16
+ %vecins12223 = insertelement <4 x float> %tmp1157, float %val, i32 0
+ store volatile <4 x float> %vecins12223, <4 x float>* undef, align 16
%tmp1158 = load <4 x float>, <4 x float>* undef, align 16
- %add12242 = fadd float undef, 0x4067E33340000000
+ %add12242 = fadd float %val, 0x4067E33340000000
%tmp1159 = load <4 x float>, <4 x float>* undef, align 16
%vecins12243 = insertelement <4 x float> %tmp1159, float %add12242, i32 2
- store <4 x float> %vecins12243, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins12243, <4 x float>* undef, align 16
%tmp1160 = load <4 x float>, <4 x float>* undef, align 16
%vecext12244 = extractelement <4 x float> %tmp1160, i32 3
%add12245 = fadd float %vecext12244, 0x4071AE6660000000
%tmp1161 = load <4 x float>, <4 x float>* undef, align 16
%vecins12246 = insertelement <4 x float> %tmp1161, float %add12245, i32 3
- store <4 x float> %vecins12246, <4 x float>* undef, align 16
- store <4 x float> <float -4.880000e+02, float 0xC079966660000000, float -8.450000e+01, float 0xC0464CCCC0000000>, <4 x float>* %.compoundliteral12247
+ store volatile <4 x float> %vecins12246, <4 x float>* undef, align 16
+ store volatile <4 x float> <float -4.880000e+02, float 0xC079966660000000, float -8.450000e+01, float 0xC0464CCCC0000000>, <4 x float>* %.compoundliteral12247
%tmp1162 = load <4 x float>, <4 x float>* %.compoundliteral12247
%tmp1163 = load <4 x float>, <4 x float>* undef, align 16
%add12248 = fadd <4 x float> %tmp1163, %tmp1162
- store <4 x float> %add12248, <4 x float>* undef, align 16
+ store volatile <4 x float> %add12248, <4 x float>* undef, align 16
%tmp1164 = load <4 x float>, <4 x float>* undef, align 16
%vecext12249 = extractelement <4 x float> %tmp1164, i32 0
%add12250 = fadd float %vecext12249, 1.075000e+02
%tmp1165 = load <4 x float>, <4 x float>* undef, align 16
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
%tmp1166 = load <4 x float>, <4 x float>* undef, align 16
%vecext12252 = extractelement <4 x float> %tmp1166, i32 1
%add12253 = fadd float %vecext12252, 0xC0662CCCC0000000
%tmp1167 = load <4 x float>, <4 x float>* undef, align 16
%vecins12254 = insertelement <4 x float> %tmp1167, float %add12253, i32 1
- store <4 x float> %vecins12254, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins12254, <4 x float>* undef, align 16
%tmp1168 = load <4 x float>, <4 x float>* undef, align 16
%vecext12255 = extractelement <4 x float> %tmp1168, i32 2
%add12256 = fadd float %vecext12255, 0x40554CCCC0000000
- store <4 x float> undef, <4 x float>* undef, align 16
- %add13141 = fadd float undef, 0x40768999A0000000
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
+ %add13141 = fadd float %val, 0x40768999A0000000
%tmp1169 = load <4 x float>, <4 x float>* undef, align 16
%vecins13142 = insertelement <4 x float> %tmp1169, float %add13141, i32 3
- store <4 x float> %vecins13142, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins13142, <4 x float>* undef, align 16
%tmp1170 = load <4 x float>, <4 x float>* undef
%add13144 = fadd <4 x float> undef, %tmp1170
- store <4 x float> %add13144, <4 x float>* undef, align 16
+ store volatile <4 x float> %add13144, <4 x float>* undef, align 16
%tmp1171 = load <4 x float>, <4 x float>* undef, align 16
%vecext13145 = extractelement <4 x float> %tmp1171, i32 0
%add13146 = fadd float %vecext13145, 3.975000e+02
@@ -6630,137 +6630,137 @@ entry:
%add13379 = fadd float %vecext13378, 0xC053B33340000000
%tmp1173 = load <4 x float>, <4 x float>* undef, align 16
%vecins13380 = insertelement <4 x float> %tmp1173, float %add13379, i32 3
- store <4 x float> %vecins13380, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins13380, <4 x float>* undef, align 16
%tmp1174 = load <4 x float>, <4 x float>* undef, align 16
- %vecins13408 = insertelement <4 x float> %tmp1174, float undef, i32 3
- store <4 x float> %vecins13408, <4 x float>* undef, align 16
- store <4 x float> <float 0xC0455999A0000000, float 0xC07D366660000000, float 4.240000e+02, float -1.670000e+02>, <4 x float>* undef
+ %vecins13408 = insertelement <4 x float> %tmp1174, float %val, i32 3
+ store volatile <4 x float> %vecins13408, <4 x float>* undef, align 16
+ store volatile <4 x float> <float 0xC0455999A0000000, float 0xC07D366660000000, float 4.240000e+02, float -1.670000e+02>, <4 x float>* undef
%tmp1175 = load <4 x float>, <4 x float>* undef
%tmp1176 = load <4 x float>, <4 x float>* undef, align 16
%add13410 = fadd <4 x float> %tmp1176, %tmp1175
- store <4 x float> %add13410, <4 x float>* undef, align 16
+ store volatile <4 x float> %add13410, <4 x float>* undef, align 16
%tmp1177 = load <4 x float>, <4 x float>* undef, align 16
- %add13412 = fadd float undef, 0xC0708999A0000000
+ %add13412 = fadd float %val, 0xC0708999A0000000
%tmp1178 = load <4 x float>, <4 x float>* undef, align 16
%vecins13413 = insertelement <4 x float> %tmp1178, float %add13412, i32 0
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
%vecext13428 = extractelement <4 x float> undef, i32 1
%add13429 = fadd float %vecext13428, 0xC063BCCCC0000000
%tmp1179 = load <4 x float>, <4 x float>* undef, align 16
%vecins13430 = insertelement <4 x float> %tmp1179, float %add13429, i32 1
- store <4 x float> %vecins13430, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins13430, <4 x float>* undef, align 16
%tmp1180 = load <4 x float>, <4 x float>* undef, align 16
%vecext13431 = extractelement <4 x float> %tmp1180, i32 2
- %vecins13433 = insertelement <4 x float> undef, float undef, i32 2
- store <4 x float> undef, <4 x float>* undef, align 16
- %add13449 = fadd float undef, 4.590000e+02
+ %vecins13433 = insertelement <4 x float> undef, float %val, i32 2
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
+ %add13449 = fadd float %val, 4.590000e+02
%tmp1181 = load <4 x float>, <4 x float>* undef, align 16
%vecins13450 = insertelement <4 x float> %tmp1181, float %add13449, i32 3
- store <4 x float> %vecins13450, <4 x float>* undef, align 16
- store <4 x float> <float 0xC073A66660000000, float 0xC041B33340000000, float 0x4066233340000000, float 0x4071C33340000000>, <4 x float>* undef
+ store volatile <4 x float> %vecins13450, <4 x float>* undef, align 16
+ store volatile <4 x float> <float 0xC073A66660000000, float 0xC041B33340000000, float 0x4066233340000000, float 0x4071C33340000000>, <4 x float>* undef
%tmp1182 = load <4 x float>, <4 x float>* undef
%tmp1183 = load <4 x float>, <4 x float>* undef, align 16
%add13452 = fadd <4 x float> %tmp1183, %tmp1182
- store <4 x float> %add13452, <4 x float>* undef, align 16
+ store volatile <4 x float> %add13452, <4 x float>* undef, align 16
%tmp1184 = load <4 x float>, <4 x float>* undef, align 16
%vecext13453 = extractelement <4 x float> %tmp1184, i32 0
%add13454 = fadd float %vecext13453, 0xC072866660000000
%tmp1185 = load <4 x float>, <4 x float>* undef, align 16
%vecins13455 = insertelement <4 x float> %tmp1185, float %add13454, i32 0
- %add13471 = fadd float undef, 0xC0556CCCC0000000
+ %add13471 = fadd float %val, 0xC0556CCCC0000000
%tmp1186 = load <4 x float>, <4 x float>* undef, align 16
%vecins13472 = insertelement <4 x float> %tmp1186, float %add13471, i32 1
- store <4 x float> %vecins13472, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins13472, <4 x float>* undef, align 16
%tmp1187 = load <4 x float>, <4 x float>* undef, align 16
%vecext13473 = extractelement <4 x float> %tmp1187, i32 2
%add13474 = fadd float %vecext13473, 0xC0786999A0000000
%tmp1188 = load <4 x float>, <4 x float>* undef, align 16
%vecins13475 = insertelement <4 x float> %tmp1188, float %add13474, i32 2
- store <4 x float> %vecins13475, <4 x float>* undef, align 16
- %add13477 = fadd float undef, 0xC07C3E6660000000
+ store volatile <4 x float> %vecins13475, <4 x float>* undef, align 16
+ %add13477 = fadd float %val, 0xC07C3E6660000000
%tmp1189 = load <4 x float>, <4 x float>* undef, align 16
%vecins13478 = insertelement <4 x float> %tmp1189, float %add13477, i32 3
- store <4 x float> %vecins13478, <4 x float>* undef, align 16
- store <4 x float> <float -4.740000e+02, float 0x4023CCCCC0000000, float 0xC05C266660000000, float 0x407B7199A0000000>, <4 x float>* undef
+ store volatile <4 x float> %vecins13478, <4 x float>* undef, align 16
+ store volatile <4 x float> <float -4.740000e+02, float 0x4023CCCCC0000000, float 0xC05C266660000000, float 0x407B7199A0000000>, <4 x float>* undef
%tmp1190 = load <4 x float>, <4 x float>* undef, align 16
%add13480 = fadd <4 x float> %tmp1190, undef
- store <4 x float> %add13480, <4 x float>* undef, align 16
+ store volatile <4 x float> %add13480, <4 x float>* undef, align 16
%tmp1191 = load <4 x float>, <4 x float>* undef, align 16
%vecext13481 = extractelement <4 x float> %tmp1191, i32 0
%add13482 = fadd float %vecext13481, 0xC07BA4CCC0000000
%tmp1192 = load <4 x float>, <4 x float>* undef, align 16
%vecins13483 = insertelement <4 x float> %tmp1192, float %add13482, i32 0
- store <4 x float> %vecins13483, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins13483, <4 x float>* undef, align 16
%tmp1193 = load <4 x float>, <4 x float>* undef, align 16
- %add13485 = fadd float undef, 0x406B1999A0000000
+ %add13485 = fadd float %val, 0x406B1999A0000000
%tmp1194 = load <4 x float>, <4 x float>* undef, align 16
%vecins13486 = insertelement <4 x float> %tmp1194, float %add13485, i32 1
- store <4 x float> %vecins13486, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins13486, <4 x float>* undef, align 16
%tmp1195 = load <4 x float>, <4 x float>* undef, align 16
%vecext13487 = extractelement <4 x float> %tmp1195, i32 2
%add13488 = fadd float %vecext13487, 0x40647999A0000000
%tmp1196 = load <4 x float>, <4 x float>* undef, align 16
%vecins13489 = insertelement <4 x float> %tmp1196, float %add13488, i32 2
- store <4 x float> %vecins13489, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins13489, <4 x float>* undef, align 16
%tmp1197 = load <4 x float>, <4 x float>* undef, align 16
%vecext13490 = extractelement <4 x float> %tmp1197, i32 3
%tmp1198 = load <4 x float>, <4 x float>* undef, align 16
- %vecins13492 = insertelement <4 x float> %tmp1198, float undef, i32 3
- store <4 x float> %vecins13492, <4 x float>* undef, align 16
+ %vecins13492 = insertelement <4 x float> %tmp1198, float %val, i32 3
+ store volatile <4 x float> %vecins13492, <4 x float>* undef, align 16
%tmp1199 = load <4 x float>, <4 x float>* %.compoundliteral13493
%tmp1200 = load <4 x float>, <4 x float>* undef, align 16
- store <4 x float> undef, <4 x float>* undef, align 16
- %vecins13548 = insertelement <4 x float> undef, float undef, i32 3
- store <4 x float> <float 4.540000e+02, float 3.760000e+02, float 0x406EA33340000000, float 0x405AACCCC0000000>, <4 x float>* %.compoundliteral13549
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
+ %vecins13548 = insertelement <4 x float> undef, float %val, i32 3
+ store volatile <4 x float> <float 4.540000e+02, float 3.760000e+02, float 0x406EA33340000000, float 0x405AACCCC0000000>, <4 x float>* %.compoundliteral13549
%tmp1201 = load <4 x float>, <4 x float>* undef, align 16
- %add13552 = fadd float undef, 3.230000e+02
+ %add13552 = fadd float %val, 3.230000e+02
%tmp1202 = load <4 x float>, <4 x float>* undef, align 16
%vecins13553 = insertelement <4 x float> %tmp1202, float %add13552, i32 0
%tmp1203 = load <4 x float>, <4 x float>* undef, align 16
%vecext13554 = extractelement <4 x float> %tmp1203, i32 1
%tmp1204 = load <4 x float>, <4 x float>* undef, align 16
- %vecins13556 = insertelement <4 x float> %tmp1204, float undef, i32 1
- store <4 x float> %vecins13556, <4 x float>* undef, align 16
+ %vecins13556 = insertelement <4 x float> %tmp1204, float %val, i32 1
+ store volatile <4 x float> %vecins13556, <4 x float>* undef, align 16
%tmp1205 = load <4 x float>, <4 x float>* undef, align 16
- %add13558 = fadd float undef, 2.625000e+02
+ %add13558 = fadd float %val, 2.625000e+02
%tmp1206 = load <4 x float>, <4 x float>* undef, align 16
%vecins13559 = insertelement <4 x float> %tmp1206, float %add13558, i32 2
- store <4 x float> %vecins13559, <4 x float>* undef, align 16
- %add13575 = fadd float undef, -4.725000e+02
+ store volatile <4 x float> %vecins13559, <4 x float>* undef, align 16
+ %add13575 = fadd float %val, -4.725000e+02
%tmp1207 = load <4 x float>, <4 x float>* undef, align 16
%vecins13576 = insertelement <4 x float> %tmp1207, float %add13575, i32 3
- store <4 x float> %vecins13576, <4 x float>* undef, align 16
- store <4 x float> <float 0x40334CCCC0000000, float 0xC0785CCCC0000000, float 0xC078D66660000000, float 3.745000e+02>, <4 x float>* undef
+ store volatile <4 x float> %vecins13576, <4 x float>* undef, align 16
+ store volatile <4 x float> <float 0x40334CCCC0000000, float 0xC0785CCCC0000000, float 0xC078D66660000000, float 3.745000e+02>, <4 x float>* undef
%tmp1208 = load <4 x float>, <4 x float>* undef
%tmp1209 = load <4 x float>, <4 x float>* undef, align 16
%add13578 = fadd <4 x float> %tmp1209, %tmp1208
- store <4 x float> %add13578, <4 x float>* undef, align 16
+ store volatile <4 x float> %add13578, <4 x float>* undef, align 16
%tmp1210 = load <4 x float>, <4 x float>* undef, align 16
%tmp1211 = load <4 x float>, <4 x float>* undef, align 16
%add13592 = fadd <4 x float> %tmp1211, undef
- store <4 x float> %add13592, <4 x float>* undef, align 16
+ store volatile <4 x float> %add13592, <4 x float>* undef, align 16
%tmp1212 = load <4 x float>, <4 x float>* undef, align 16
%vecext13593 = extractelement <4 x float> %tmp1212, i32 0
%add13594 = fadd float %vecext13593, 0xC0708B3340000000
%tmp1213 = load <4 x float>, <4 x float>* undef, align 16
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
%tmp1214 = load <4 x float>, <4 x float>* undef, align 16
%vecext13596 = extractelement <4 x float> %tmp1214, i32 1
%add13597 = fadd float %vecext13596, 0x40660999A0000000
- %vecins13604 = insertelement <4 x float> undef, float undef, i32 3
- store <4 x float> %vecins13604, <4 x float>* undef, align 16
- store <4 x float> <float 0x407B4999A0000000, float 0xC067F66660000000, float 0xC068F999A0000000, float 0xC079233340000000>, <4 x float>* undef
+ %vecins13604 = insertelement <4 x float> undef, float %val, i32 3
+ store volatile <4 x float> %vecins13604, <4 x float>* undef, align 16
+ store volatile <4 x float> <float 0x407B4999A0000000, float 0xC067F66660000000, float 0xC068F999A0000000, float 0xC079233340000000>, <4 x float>* undef
%tmp1215 = load <4 x float>, <4 x float>* undef, align 16
%add13606 = fadd <4 x float> %tmp1215, undef
%tmp1216 = load <4 x float>, <4 x float>* undef, align 16
%vecext13607 = extractelement <4 x float> %tmp1216, i32 0
- %vecins13609 = insertelement <4 x float> undef, float undef, i32 0
+ %vecins13609 = insertelement <4 x float> undef, float %val, i32 0
%tmp1217 = load <4 x float>, <4 x float>* undef, align 16
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
%tmp1218 = load <4 x float>, <4 x float>* undef, align 16
- %add13622 = fadd float undef, -3.390000e+02
+ %add13622 = fadd float %val, -3.390000e+02
%vecins13623 = insertelement <4 x float> undef, float %add13622, i32 0
- store <4 x float> %vecins13623, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins13623, <4 x float>* undef, align 16
%tmp1219 = load <4 x float>, <4 x float>* undef, align 16
%vecext13624 = extractelement <4 x float> %tmp1219, i32 1
%add13625 = fadd float %vecext13624, 0x405C3999A0000000
@@ -6772,41 +6772,41 @@ entry:
%add13631 = fadd float %vecext13630, 0xC060333340000000
%tmp1222 = load <4 x float>, <4 x float>* undef, align 16
%vecins13632 = insertelement <4 x float> %tmp1222, float %add13631, i32 3
- store <4 x float> %vecins13632, <4 x float>* undef, align 16
- store <4 x float> <float 0x4078D66660000000, float 0x4048B33340000000, float 0x4051466660000000, float -2.965000e+02>, <4 x float>* undef
+ store volatile <4 x float> %vecins13632, <4 x float>* undef, align 16
+ store volatile <4 x float> <float 0x4078D66660000000, float 0x4048B33340000000, float 0x4051466660000000, float -2.965000e+02>, <4 x float>* undef
%tmp1223 = load <4 x float>, <4 x float>* undef
%tmp1224 = load <4 x float>, <4 x float>* undef, align 16
%add13634 = fadd <4 x float> %tmp1224, %tmp1223
- store <4 x float> %add13634, <4 x float>* undef, align 16
+ store volatile <4 x float> %add13634, <4 x float>* undef, align 16
%vecext13635 = extractelement <4 x float> undef, i32 0
%add13636 = fadd float %vecext13635, 0x406A5999A0000000
%tmp1225 = load <4 x float>, <4 x float>* undef, align 16
%vecins13637 = insertelement <4 x float> %tmp1225, float %add13636, i32 0
- store <4 x float> %vecins13637, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins13637, <4 x float>* undef, align 16
%tmp1226 = load <4 x float>, <4 x float>* undef, align 16
%tmp1227 = load <4 x float>, <4 x float>* undef, align 16
- %vecins13643 = insertelement <4 x float> %tmp1227, float undef, i32 2
- store <4 x float> undef, <4 x float>* undef, align 16
+ %vecins13643 = insertelement <4 x float> %tmp1227, float %val, i32 2
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
%tmp1228 = load <4 x float>, <4 x float>* undef, align 16
- %add13785 = fadd float undef, 0x4068866660000000
+ %add13785 = fadd float %val, 0x4068866660000000
%tmp1229 = load <4 x float>, <4 x float>* undef, align 16
%vecins13786 = insertelement <4 x float> %tmp1229, float %add13785, i32 3
- store <4 x float> %vecins13786, <4 x float>* undef, align 16
- store <4 x float> <float 0x407704CCC0000000, float 0x4047B33340000000, float 0x40797B3340000000, float 0xC0652CCCC0000000>, <4 x float>* %.compoundliteral13787
+ store volatile <4 x float> %vecins13786, <4 x float>* undef, align 16
+ store volatile <4 x float> <float 0x407704CCC0000000, float 0x4047B33340000000, float 0x40797B3340000000, float 0xC0652CCCC0000000>, <4 x float>* %.compoundliteral13787
%tmp1230 = load <4 x float>, <4 x float>* undef, align 16
%add13788 = fadd <4 x float> %tmp1230, undef
%tmp1231 = load <4 x float>, <4 x float>* undef
%tmp1232 = load <4 x float>, <4 x float>* undef, align 16
%add13802 = fadd <4 x float> %tmp1232, %tmp1231
- store <4 x float> %add13802, <4 x float>* undef, align 16
+ store volatile <4 x float> %add13802, <4 x float>* undef, align 16
%tmp1233 = load <4 x float>, <4 x float>* undef, align 16
%vecext13803 = extractelement <4 x float> %tmp1233, i32 0
%add13804 = fadd float %vecext13803, -2.900000e+01
%tmp1234 = load <4 x float>, <4 x float>* undef, align 16
%vecins13805 = insertelement <4 x float> %tmp1234, float %add13804, i32 0
- store <4 x float> %vecins13805, <4 x float>* undef, align 16
+ store volatile <4 x float> %vecins13805, <4 x float>* undef, align 16
%tmp1235 = load <4 x float>, <4 x float>* undef, align 16
- %add13807 = fadd float undef, 6.400000e+01
+ %add13807 = fadd float %val, 6.400000e+01
%tmp1236 = load <4 x float>, <4 x float>* undef, align 16
%tmp1237 = load <4 x float>, <4 x float>* undef, align 16
%vecext13809 = extractelement <4 x float> %tmp1237, i32 2
@@ -6814,28 +6814,28 @@ entry:
%vecext13812 = extractelement <4 x float> %tmp1238, i32 3
%add13813 = fadd float %vecext13812, -3.615000e+02
%vecins13814 = insertelement <4 x float> undef, float %add13813, i32 3
- store <4 x float> %vecins13814, <4 x float>* undef, align 16
- store <4 x float> <float -2.270000e+02, float -1.500000e+01, float 0x407084CCC0000000, float -1.425000e+02>, <4 x float>* undef
+ store volatile <4 x float> %vecins13814, <4 x float>* undef, align 16
+ store volatile <4 x float> <float -2.270000e+02, float -1.500000e+01, float 0x407084CCC0000000, float -1.425000e+02>, <4 x float>* undef
%tmp1239 = load <4 x float>, <4 x float>* undef
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
%tmp1240 = load <4 x float>, <4 x float>* undef, align 16
%vecext13817 = extractelement <4 x float> %tmp1240, i32 0
- %vecins13856 = insertelement <4 x float> undef, float undef, i32 3
- store <4 x float> %vecins13856, <4 x float>* undef, align 16
- store <4 x float> <float 0x40656CCCC0000000, float 0xC0656999A0000000, float 0x40778E6660000000, float 0x407ECE6660000000>, <4 x float>* undef
+ %vecins13856 = insertelement <4 x float> undef, float %val, i32 3
+ store volatile <4 x float> %vecins13856, <4 x float>* undef, align 16
+ store volatile <4 x float> <float 0x40656CCCC0000000, float 0xC0656999A0000000, float 0x40778E6660000000, float 0x407ECE6660000000>, <4 x float>* undef
%tmp1241 = load <4 x float>, <4 x float>* undef
%tmp1242 = load <4 x float>, <4 x float>* undef, align 16
- store <4 x float> undef, <4 x float>* undef, align 16
+ store volatile <4 x float> undef, <4 x float>* undef, align 16
%tmp1243 = load <4 x float>, <4 x float>* undef, align 16
%vecext13859 = extractelement <4 x float> %tmp1243, i32 0
%tmp1244 = load <4 x float>, <4 x float>* undef, align 16
- %vecins13861 = insertelement <4 x float> %tmp1244, float undef, i32 0
+ %vecins13861 = insertelement <4 x float> %tmp1244, float %val, i32 0
%tmp1245 = load <4 x float>, <4 x float>* undef, align 16
%vecext13862 = extractelement <4 x float> %tmp1245, i32 1
%add13863 = fadd float %vecext13862, -1.380000e+02
%vecins13864 = insertelement <4 x float> undef, float %add13863, i32 1
- %vecins13867 = insertelement <4 x float> undef, float undef, i32 2
- store <4 x float> %vecins13867, <4 x float>* undef, align 16
+ %vecins13867 = insertelement <4 x float> undef, float %val, i32 2
+ store volatile <4 x float> %vecins13867, <4 x float>* undef, align 16
%tmp1246 = load <4 x float>, <4 x float>* undef, align 16
%tmp1247 = load <4 x float>, <4 x float>* undef, align 16
ret <4 x float> undef
diff --git a/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll b/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll
index c0c691fc5b0a..546ffdd66ff8 100644
--- a/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll
+++ b/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll
@@ -1,13 +1,13 @@
; RUN: llc -march=hexagon -O0 -hexagon-align-loads=0 < %s | FileCheck %s
; CHECK-LABEL: danny:
-; CHECK: r1 = r0
-; CHECK-DAG: [[T0:r[0-9]+]] = memuh(r1+#0)
-; CHECK-DAG: [[T1:r[0-9]+]] = memuh(r1+#2)
-; CHECK: r2 |= asl([[T1]],#16)
-; CHECK-DAG: [[T2:r[0-9]+]] = memuh(r1+#4)
-; CHECK-DAG: [[T3:r[0-9]+]] = memuh(r1+#6)
-; CHECK: r1 |= asl([[T3]],#16)
+; CHECK-DAG: [[T0:r[0-9]+]] = memuh(r0+#0)
+; CHECK-DAG: [[T1:r[0-9]+]] = memuh(r0+#2)
+; CHECK: [[T0]] |= asl([[T1]],#16)
+; CHECK-DAG: [[T2:r[0-9]+]] = memuh(r0+#4)
+; CHECK-DAG: [[T3:r[0-9]+]] = memuh(r0+#6)
+; CHECK: [[T2]] |= asl([[T3]],#16)
+; CHECK: combine([[T2]],[[T0]])
define <4 x i16> @danny(<4 x i16>* %p) {
%t0 = load <4 x i16>, <4 x i16>* %p, align 2
ret <4 x i16> %t0
@@ -15,8 +15,8 @@ define <4 x i16> @danny(<4 x i16>* %p) {
; CHECK-LABEL: sammy:
; CHECK-DAG: [[T0:r[0-9]+]] = memw(r0+#0)
-; CHECK-DAG: r1 = memw(r0+#4)
-; CHECK: r0 = [[T0]]
+; CHECK-DAG: [[T1:r[0-9]+]] = memw(r0+#4)
+; CHECK: combine([[T1]],[[T0]])
define <4 x i16> @sammy(<4 x i16>* %p) {
%t0 = load <4 x i16>, <4 x i16>* %p, align 4
ret <4 x i16> %t0
diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll b/llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll
index f22fbcc7b73e..9025af913ad7 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll
@@ -244,12 +244,12 @@ define void @cxiiiiconv() {
; ALL-DAG: lw $[[REG_C1_ADDR:[0-9]+]], %got(c1)($[[REG_GP]])
; ALL-DAG: lbu $[[REG_C1:[0-9]+]], 0($[[REG_C1_ADDR]])
; 32R1-DAG: sll $[[REG_C1_1:[0-9]+]], $[[REG_C1]], 24
- ; 32R1-DAG: sra $4, $[[REG_C1_1]], 24
- ; 32R2-DAG: seb $4, $[[REG_C1]]
+ ; 32R1-DAG: sra $5, $[[REG_C1_1]], 24
+ ; 32R2-DAG: seb $5, $[[REG_C1]]
; FIXME: andi is superfulous
; ALL-DAG: lw $[[REG_UC1_ADDR:[0-9]+]], %got(uc1)($[[REG_GP]])
; ALL-DAG: lbu $[[REG_UC1:[0-9]+]], 0($[[REG_UC1_ADDR]])
- ; ALL-DAG: andi $5, $[[REG_UC1]], 255
+ ; ALL-DAG: andi $4, $[[REG_UC1]], 255
; ALL-DAG: lw $[[REG_S1_ADDR:[0-9]+]], %got(s1)($[[REG_GP]])
; ALL-DAG: lhu $[[REG_S1:[0-9]+]], 0($[[REG_S1_ADDR]])
; 32R1-DAG: sll $[[REG_S1_1:[0-9]+]], $[[REG_S1]], 16
diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll b/llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll
index 1a2ad44b0a6b..3e30f75d6f4a 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll
@@ -17,9 +17,15 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1)
define void @cpy(i8* %src, i32 %i) {
; ALL-LABEL: cpy:
- ; ALL: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}})
- ; ALL: lw $[[T2:[0-9]+]], %got(memcpy)(${{[0-9]+}})
- ; ALL: jalr $[[T2]]
+ ; ALL-DAG: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}})
+ ; ALL-DAG: sw $4, 24($sp)
+ ; ALL-DAG: move $4, $[[T0]]
+ ; ALL-DAG: sw $5, 20($sp)
+ ; ALL-DAG: lw $[[T1:[0-9]+]], 24($sp)
+ ; ALL-DAG: move $5, $[[T1]]
+ ; ALL-DAG: lw $6, 20($sp)
+ ; ALL-DAG: lw $[[T2:[0-9]+]], %got(memcpy)(${{[0-9]+}})
+ ; ALL: jalr $[[T2]]
; ALL-NEXT: nop
; ALL-NOT: {{.*}}$2{{.*}}
call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @dest, i32 0, i32 0), i8* %src, i32 %i, i1 false)
@@ -30,8 +36,14 @@ define void @mov(i8* %src, i32 %i) {
; ALL-LABEL: mov:
- ; ALL: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}})
- ; ALL: lw $[[T2:[0-9]+]], %got(memmove)(${{[0-9]+}})
+ ; ALL-DAG: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}})
+ ; ALL-DAG: sw $4, 24($sp)
+ ; ALL-DAG: move $4, $[[T0]]
+ ; ALL-DAG: sw $5, 20($sp)
+ ; ALL-DAG: lw $[[T1:[0-9]+]], 24($sp)
+ ; ALL-DAG: move $5, $[[T1]]
+ ; ALL-DAG: lw $6, 20($sp)
+ ; ALL-DAG: lw $[[T2:[0-9]+]], %got(memmove)(${{[0-9]+}})
; ALL: jalr $[[T2]]
; ALL-NEXT: nop
; ALL-NOT: {{.*}}$2{{.*}}
@@ -42,8 +54,15 @@ define void @mov(i8* %src, i32 %i) {
define void @clear(i32 %i) {
; ALL-LABEL: clear:
- ; ALL: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}})
- ; ALL: lw $[[T2:[0-9]+]], %got(memset)(${{[0-9]+}})
+ ; ALL-DAG: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}})
+ ; ALL-DAG: sw $4, 16($sp)
+ ; ALL-DAG: move $4, $[[T0]]
+ ; ALL-DAG: addiu $[[T1:[0-9]+]], $zero, 42
+ ; 32R1-DAG: sll $[[T2:[0-9]+]], $[[T1]], 24
+ ; 32R1-DAG: sra $5, $[[T2]], 24
+ ; 32R2-DAG: seb $5, $[[T1]]
+ ; ALL-DAG: lw $6, 16($sp)
+ ; ALL-DAG: lw $[[T2:[0-9]+]], %got(memset)(${{[0-9]+}})
; ALL: jalr $[[T2]]
; ALL-NEXT: nop
; ALL-NOT: {{.*}}$2{{.*}}
diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/pr40325.ll b/llvm/test/CodeGen/Mips/Fast-ISel/pr40325.ll
index e482a13f3d5c..6befe70270df 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/pr40325.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/pr40325.ll
@@ -5,10 +5,9 @@ define void @test(i32 %x, i1* %p) nounwind {
; CHECK-LABEL: test:
; CHECK: # %bb.0:
; CHECK-NEXT: move $1, $4
-; CHECK-NEXT: move $4, $1
+; CHECK-NEXT: andi $2, $4, 1
+; CHECK-NEXT: sb $2, 0($5)
; CHECK-NEXT: andi $1, $1, 1
-; CHECK-NEXT: sb $1, 0($5)
-; CHECK-NEXT: andi $1, $4, 1
; CHECK-NEXT: bgtz $1, $BB0_1
; CHECK-NEXT: nop
; CHECK-NEXT: # %bb.1: # %foo
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll
index fe26837d0a34..0b217b837479 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll
@@ -86,11 +86,12 @@ entry:
define i64 @add_i64(i64 %a, i64 %b) {
; MIPS32-LABEL: add_i64:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addu $2, $6, $4
-; MIPS32-NEXT: sltu $3, $2, $4
-; MIPS32-NEXT: addu $1, $7, $5
-; MIPS32-NEXT: andi $3, $3, 1
-; MIPS32-NEXT: addu $3, $1, $3
+; MIPS32-NEXT: addu $1, $6, $4
+; MIPS32-NEXT: sltu $2, $1, $4
+; MIPS32-NEXT: addu $3, $7, $5
+; MIPS32-NEXT: andi $2, $2, 1
+; MIPS32-NEXT: addu $3, $3, $2
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -101,30 +102,34 @@ entry:
define i128 @add_i128(i128 %a, i128 %b) {
; MIPS32-LABEL: add_i128:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: move $8, $4
-; MIPS32-NEXT: move $3, $5
-; MIPS32-NEXT: move $4, $6
-; MIPS32-NEXT: addiu $1, $sp, 16
-; MIPS32-NEXT: lw $2, 0($1)
-; MIPS32-NEXT: addiu $1, $sp, 20
-; MIPS32-NEXT: lw $6, 0($1)
+; MIPS32-NEXT: addiu $sp, $sp, -8
+; MIPS32-NEXT: .cfi_def_cfa_offset 8
; MIPS32-NEXT: addiu $1, $sp, 24
-; MIPS32-NEXT: lw $5, 0($1)
-; MIPS32-NEXT: addiu $1, $sp, 28
; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: addu $2, $2, $8
-; MIPS32-NEXT: sltu $8, $2, $8
-; MIPS32-NEXT: addu $3, $6, $3
-; MIPS32-NEXT: andi $8, $8, 1
-; MIPS32-NEXT: addu $3, $3, $8
-; MIPS32-NEXT: sltu $6, $3, $6
+; MIPS32-NEXT: addiu $2, $sp, 28
+; MIPS32-NEXT: lw $2, 0($2)
+; MIPS32-NEXT: addiu $3, $sp, 32
+; MIPS32-NEXT: lw $3, 0($3)
+; MIPS32-NEXT: addiu $8, $sp, 36
+; MIPS32-NEXT: lw $8, 0($8)
+; MIPS32-NEXT: addu $1, $1, $4
+; MIPS32-NEXT: sltu $4, $1, $4
+; MIPS32-NEXT: addu $5, $2, $5
+; MIPS32-NEXT: andi $4, $4, 1
; MIPS32-NEXT: addu $4, $5, $4
-; MIPS32-NEXT: andi $6, $6, 1
-; MIPS32-NEXT: addu $4, $4, $6
-; MIPS32-NEXT: sltu $5, $4, $5
-; MIPS32-NEXT: addu $1, $1, $7
-; MIPS32-NEXT: andi $5, $5, 1
-; MIPS32-NEXT: addu $5, $1, $5
+; MIPS32-NEXT: sltu $2, $4, $2
+; MIPS32-NEXT: addu $5, $3, $6
+; MIPS32-NEXT: andi $2, $2, 1
+; MIPS32-NEXT: addu $2, $5, $2
+; MIPS32-NEXT: sltu $3, $2, $3
+; MIPS32-NEXT: addu $5, $8, $7
+; MIPS32-NEXT: andi $3, $3, 1
+; MIPS32-NEXT: addu $5, $5, $3
+; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: move $2, $1
+; MIPS32-NEXT: move $3, $4
+; MIPS32-NEXT: lw $4, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: addiu $sp, $sp, 8
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add_vec.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add_vec.ll
index 74ecbf6ed7a8..5d8585173b99 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add_vec.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add_vec.ll
@@ -4,9 +4,9 @@
define void @add_v16i8(<16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %c) {
; P5600-LABEL: add_v16i8:
; P5600: # %bb.0: # %entry
-; P5600-NEXT: ld.b $w1, 0($4)
-; P5600-NEXT: ld.b $w0, 0($5)
-; P5600-NEXT: addv.b $w0, $w0, $w1
+; P5600-NEXT: ld.b $w0, 0($4)
+; P5600-NEXT: ld.b $w1, 0($5)
+; P5600-NEXT: addv.b $w0, $w1, $w0
; P5600-NEXT: st.b $w0, 0($6)
; P5600-NEXT: jr $ra
; P5600-NEXT: nop
@@ -21,9 +21,9 @@ entry:
define void @add_v8i16(<8 x i16>* %a, <8 x i16>* %b, <8 x i16>* %c) {
; P5600-LABEL: add_v8i16:
; P5600: # %bb.0: # %entry
-; P5600-NEXT: ld.h $w1, 0($4)
-; P5600-NEXT: ld.h $w0, 0($5)
-; P5600-NEXT: addv.h $w0, $w0, $w1
+; P5600-NEXT: ld.h $w0, 0($4)
+; P5600-NEXT: ld.h $w1, 0($5)
+; P5600-NEXT: addv.h $w0, $w1, $w0
; P5600-NEXT: st.h $w0, 0($6)
; P5600-NEXT: jr $ra
; P5600-NEXT: nop
@@ -38,9 +38,9 @@ entry:
define void @add_v4i32(<4 x i32>* %a, <4 x i32>* %b, <4 x i32>* %c) {
; P5600-LABEL: add_v4i32:
; P5600: # %bb.0: # %entry
-; P5600-NEXT: ld.w $w1, 0($4)
-; P5600-NEXT: ld.w $w0, 0($5)
-; P5600-NEXT: addv.w $w0, $w0, $w1
+; P5600-NEXT: ld.w $w0, 0($4)
+; P5600-NEXT: ld.w $w1, 0($5)
+; P5600-NEXT: addv.w $w0, $w1, $w0
; P5600-NEXT: st.w $w0, 0($6)
; P5600-NEXT: jr $ra
; P5600-NEXT: nop
@@ -55,9 +55,9 @@ entry:
define void @add_v2i64(<2 x i64>* %a, <2 x i64>* %b, <2 x i64>* %c) {
; P5600-LABEL: add_v2i64:
; P5600: # %bb.0: # %entry
-; P5600-NEXT: ld.d $w1, 0($4)
-; P5600-NEXT: ld.d $w0, 0($5)
-; P5600-NEXT: addv.d $w0, $w0, $w1
+; P5600-NEXT: ld.d $w0, 0($4)
+; P5600-NEXT: ld.d $w1, 0($5)
+; P5600-NEXT: addv.d $w0, $w1, $w0
; P5600-NEXT: st.d $w0, 0($6)
; P5600-NEXT: jr $ra
; P5600-NEXT: nop
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/aggregate_struct_return.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/aggregate_struct_return.ll
index 32bc78827baf..a9f49c025b95 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/aggregate_struct_return.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/aggregate_struct_return.ll
@@ -6,10 +6,10 @@ define { float, float } @add_complex_float({ float, float }* %a, { float, float
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lwc1 $f0, 0($4)
; MIPS32-NEXT: lwc1 $f1, 4($4)
-; MIPS32-NEXT: lwc1 $f3, 0($5)
-; MIPS32-NEXT: lwc1 $f2, 4($5)
-; MIPS32-NEXT: add.s $f0, $f0, $f3
-; MIPS32-NEXT: add.s $f2, $f1, $f2
+; MIPS32-NEXT: lwc1 $f2, 0($5)
+; MIPS32-NEXT: lwc1 $f3, 4($5)
+; MIPS32-NEXT: add.s $f0, $f0, $f2
+; MIPS32-NEXT: add.s $f2, $f1, $f3
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -33,10 +33,10 @@ define { double, double } @add_complex_double({ double, double }* %a, { double,
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: ldc1 $f0, 0($4)
; MIPS32-NEXT: ldc1 $f2, 8($4)
-; MIPS32-NEXT: ldc1 $f6, 0($5)
-; MIPS32-NEXT: ldc1 $f4, 8($5)
-; MIPS32-NEXT: add.d $f0, $f0, $f6
-; MIPS32-NEXT: add.d $f2, $f2, $f4
+; MIPS32-NEXT: ldc1 $f4, 0($5)
+; MIPS32-NEXT: ldc1 $f6, 8($5)
+; MIPS32-NEXT: add.d $f0, $f0, $f4
+; MIPS32-NEXT: add.d $f2, $f2, $f6
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -66,9 +66,9 @@ define void @call_ret_complex_float({ float, float }* %z) {
; MIPS32-NEXT: sw $4, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: jal ret_complex_float
; MIPS32-NEXT: nop
-; MIPS32-NEXT: lw $4, 16($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: swc1 $f0, 0($4)
-; MIPS32-NEXT: swc1 $f2, 4($4)
+; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: swc1 $f0, 0($1)
+; MIPS32-NEXT: swc1 $f2, 4($1)
; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: addiu $sp, $sp, 24
; MIPS32-NEXT: jr $ra
@@ -95,9 +95,9 @@ define void @call_ret_complex_double({ double, double }* %z) {
; MIPS32-NEXT: sw $4, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: jal ret_complex_double
; MIPS32-NEXT: nop
-; MIPS32-NEXT: lw $4, 16($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: sdc1 $f0, 0($4)
-; MIPS32-NEXT: sdc1 $f2, 8($4)
+; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sdc1 $f0, 0($1)
+; MIPS32-NEXT: sdc1 $f2, 8($1)
; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: addiu $sp, $sp, 24
; MIPS32-NEXT: jr $ra
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitreverse.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitreverse.ll
index 2bbbe4ac3ae2..662bcdf757b6 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitreverse.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitreverse.ll
@@ -6,64 +6,64 @@ declare i32 @llvm.bitreverse.i32(i32)
define i32 @bitreverse_i32(i32 signext %a) {
; MIPS32-LABEL: bitreverse_i32:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: sll $2, $4, 24
-; MIPS32-NEXT: srl $1, $4, 24
-; MIPS32-NEXT: or $1, $1, $2
+; MIPS32-NEXT: sll $1, $4, 24
+; MIPS32-NEXT: srl $2, $4, 24
+; MIPS32-NEXT: or $1, $2, $1
; MIPS32-NEXT: andi $2, $4, 65280
; MIPS32-NEXT: sll $2, $2, 8
; MIPS32-NEXT: or $1, $1, $2
; MIPS32-NEXT: srl $2, $4, 8
; MIPS32-NEXT: andi $2, $2, 65280
-; MIPS32-NEXT: or $2, $1, $2
-; MIPS32-NEXT: lui $1, 61680
-; MIPS32-NEXT: ori $3, $1, 61680
-; MIPS32-NEXT: and $1, $2, $3
-; MIPS32-NEXT: srl $1, $1, 4
-; MIPS32-NEXT: sll $2, $2, 4
-; MIPS32-NEXT: and $2, $2, $3
-; MIPS32-NEXT: or $2, $1, $2
-; MIPS32-NEXT: lui $1, 52428
-; MIPS32-NEXT: ori $3, $1, 52428
-; MIPS32-NEXT: and $1, $2, $3
-; MIPS32-NEXT: srl $1, $1, 2
-; MIPS32-NEXT: sll $2, $2, 2
-; MIPS32-NEXT: and $2, $2, $3
-; MIPS32-NEXT: or $2, $1, $2
-; MIPS32-NEXT: lui $1, 43690
-; MIPS32-NEXT: ori $3, $1, 43690
-; MIPS32-NEXT: and $1, $2, $3
-; MIPS32-NEXT: srl $1, $1, 1
-; MIPS32-NEXT: sll $2, $2, 1
-; MIPS32-NEXT: and $2, $2, $3
-; MIPS32-NEXT: or $2, $1, $2
+; MIPS32-NEXT: or $1, $1, $2
+; MIPS32-NEXT: lui $2, 61680
+; MIPS32-NEXT: ori $2, $2, 61680
+; MIPS32-NEXT: and $3, $1, $2
+; MIPS32-NEXT: srl $3, $3, 4
+; MIPS32-NEXT: sll $1, $1, 4
+; MIPS32-NEXT: and $1, $1, $2
+; MIPS32-NEXT: or $1, $3, $1
+; MIPS32-NEXT: lui $2, 52428
+; MIPS32-NEXT: ori $2, $2, 52428
+; MIPS32-NEXT: and $3, $1, $2
+; MIPS32-NEXT: srl $3, $3, 2
+; MIPS32-NEXT: sll $1, $1, 2
+; MIPS32-NEXT: and $1, $1, $2
+; MIPS32-NEXT: or $1, $3, $1
+; MIPS32-NEXT: lui $2, 43690
+; MIPS32-NEXT: ori $2, $2, 43690
+; MIPS32-NEXT: and $3, $1, $2
+; MIPS32-NEXT: srl $3, $3, 1
+; MIPS32-NEXT: sll $1, $1, 1
+; MIPS32-NEXT: and $1, $1, $2
+; MIPS32-NEXT: or $2, $3, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32R2-LABEL: bitreverse_i32:
; MIPS32R2: # %bb.0: # %entry
; MIPS32R2-NEXT: wsbh $1, $4
-; MIPS32R2-NEXT: rotr $2, $1, 16
-; MIPS32R2-NEXT: lui $1, 61680
-; MIPS32R2-NEXT: ori $3, $1, 61680
-; MIPS32R2-NEXT: and $1, $2, $3
-; MIPS32R2-NEXT: srl $1, $1, 4
-; MIPS32R2-NEXT: sll $2, $2, 4
-; MIPS32R2-NEXT: and $2, $2, $3
-; MIPS32R2-NEXT: or $2, $1, $2
-; MIPS32R2-NEXT: lui $1, 52428
-; MIPS32R2-NEXT: ori $3, $1, 52428
-; MIPS32R2-NEXT: and $1, $2, $3
-; MIPS32R2-NEXT: srl $1, $1, 2
-; MIPS32R2-NEXT: sll $2, $2, 2
-; MIPS32R2-NEXT: and $2, $2, $3
-; MIPS32R2-NEXT: or $2, $1, $2
-; MIPS32R2-NEXT: lui $1, 43690
-; MIPS32R2-NEXT: ori $3, $1, 43690
-; MIPS32R2-NEXT: and $1, $2, $3
-; MIPS32R2-NEXT: srl $1, $1, 1
-; MIPS32R2-NEXT: sll $2, $2, 1
-; MIPS32R2-NEXT: and $2, $2, $3
-; MIPS32R2-NEXT: or $2, $1, $2
+; MIPS32R2-NEXT: rotr $1, $1, 16
+; MIPS32R2-NEXT: lui $2, 61680
+; MIPS32R2-NEXT: ori $2, $2, 61680
+; MIPS32R2-NEXT: and $3, $1, $2
+; MIPS32R2-NEXT: srl $3, $3, 4
+; MIPS32R2-NEXT: sll $1, $1, 4
+; MIPS32R2-NEXT: and $1, $1, $2
+; MIPS32R2-NEXT: or $1, $3, $1
+; MIPS32R2-NEXT: lui $2, 52428
+; MIPS32R2-NEXT: ori $2, $2, 52428
+; MIPS32R2-NEXT: and $3, $1, $2
+; MIPS32R2-NEXT: srl $3, $3, 2
+; MIPS32R2-NEXT: sll $1, $1, 2
+; MIPS32R2-NEXT: and $1, $1, $2
+; MIPS32R2-NEXT: or $1, $3, $1
+; MIPS32R2-NEXT: lui $2, 43690
+; MIPS32R2-NEXT: ori $2, $2, 43690
+; MIPS32R2-NEXT: and $3, $1, $2
+; MIPS32R2-NEXT: srl $3, $3, 1
+; MIPS32R2-NEXT: sll $1, $1, 1
+; MIPS32R2-NEXT: and $1, $1, $2
+; MIPS32R2-NEXT: or $2, $3, $1
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: nop
entry:
@@ -75,107 +75,107 @@ declare i64 @llvm.bitreverse.i64(i64)
define i64 @bitreverse_i64(i64 signext %a) {
; MIPS32-LABEL: bitreverse_i64:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: move $3, $4
-; MIPS32-NEXT: sll $2, $5, 24
-; MIPS32-NEXT: srl $1, $5, 24
-; MIPS32-NEXT: or $1, $1, $2
+; MIPS32-NEXT: sll $1, $5, 24
+; MIPS32-NEXT: srl $2, $5, 24
+; MIPS32-NEXT: or $1, $2, $1
; MIPS32-NEXT: andi $2, $5, 65280
; MIPS32-NEXT: sll $2, $2, 8
; MIPS32-NEXT: or $1, $1, $2
; MIPS32-NEXT: srl $2, $5, 8
; MIPS32-NEXT: andi $2, $2, 65280
-; MIPS32-NEXT: or $2, $1, $2
-; MIPS32-NEXT: lui $1, 61680
-; MIPS32-NEXT: ori $6, $1, 61680
-; MIPS32-NEXT: and $1, $2, $6
-; MIPS32-NEXT: srl $1, $1, 4
-; MIPS32-NEXT: sll $2, $2, 4
-; MIPS32-NEXT: and $2, $2, $6
-; MIPS32-NEXT: or $2, $1, $2
-; MIPS32-NEXT: lui $1, 52428
-; MIPS32-NEXT: ori $5, $1, 52428
-; MIPS32-NEXT: and $1, $2, $5
-; MIPS32-NEXT: srl $1, $1, 2
+; MIPS32-NEXT: or $1, $1, $2
+; MIPS32-NEXT: lui $2, 61680
+; MIPS32-NEXT: ori $2, $2, 61680
+; MIPS32-NEXT: and $3, $1, $2
+; MIPS32-NEXT: srl $3, $3, 4
+; MIPS32-NEXT: sll $1, $1, 4
+; MIPS32-NEXT: and $1, $1, $2
+; MIPS32-NEXT: or $1, $3, $1
+; MIPS32-NEXT: lui $3, 52428
+; MIPS32-NEXT: ori $3, $3, 52428
+; MIPS32-NEXT: and $5, $1, $3
+; MIPS32-NEXT: srl $5, $5, 2
+; MIPS32-NEXT: sll $1, $1, 2
+; MIPS32-NEXT: and $1, $1, $3
+; MIPS32-NEXT: or $1, $5, $1
+; MIPS32-NEXT: lui $5, 43690
+; MIPS32-NEXT: ori $5, $5, 43690
+; MIPS32-NEXT: and $6, $1, $5
+; MIPS32-NEXT: srl $6, $6, 1
+; MIPS32-NEXT: sll $1, $1, 1
+; MIPS32-NEXT: and $1, $1, $5
+; MIPS32-NEXT: or $1, $6, $1
+; MIPS32-NEXT: sll $6, $4, 24
+; MIPS32-NEXT: srl $7, $4, 24
+; MIPS32-NEXT: or $6, $7, $6
+; MIPS32-NEXT: andi $7, $4, 65280
+; MIPS32-NEXT: sll $7, $7, 8
+; MIPS32-NEXT: or $6, $6, $7
+; MIPS32-NEXT: srl $4, $4, 8
+; MIPS32-NEXT: andi $4, $4, 65280
+; MIPS32-NEXT: or $4, $6, $4
+; MIPS32-NEXT: and $6, $4, $2
+; MIPS32-NEXT: srl $6, $6, 4
+; MIPS32-NEXT: sll $4, $4, 4
+; MIPS32-NEXT: and $2, $4, $2
+; MIPS32-NEXT: or $2, $6, $2
+; MIPS32-NEXT: and $4, $2, $3
+; MIPS32-NEXT: srl $4, $4, 2
; MIPS32-NEXT: sll $2, $2, 2
-; MIPS32-NEXT: and $2, $2, $5
-; MIPS32-NEXT: or $2, $1, $2
-; MIPS32-NEXT: lui $1, 43690
-; MIPS32-NEXT: ori $4, $1, 43690
-; MIPS32-NEXT: and $1, $2, $4
-; MIPS32-NEXT: srl $1, $1, 1
+; MIPS32-NEXT: and $2, $2, $3
+; MIPS32-NEXT: or $2, $4, $2
+; MIPS32-NEXT: and $3, $2, $5
+; MIPS32-NEXT: srl $3, $3, 1
; MIPS32-NEXT: sll $2, $2, 1
-; MIPS32-NEXT: and $2, $2, $4
-; MIPS32-NEXT: or $2, $1, $2
-; MIPS32-NEXT: sll $7, $3, 24
-; MIPS32-NEXT: srl $1, $3, 24
-; MIPS32-NEXT: or $1, $1, $7
-; MIPS32-NEXT: andi $7, $3, 65280
-; MIPS32-NEXT: sll $7, $7, 8
-; MIPS32-NEXT: or $1, $1, $7
-; MIPS32-NEXT: srl $3, $3, 8
-; MIPS32-NEXT: andi $3, $3, 65280
-; MIPS32-NEXT: or $3, $1, $3
-; MIPS32-NEXT: and $1, $3, $6
-; MIPS32-NEXT: srl $1, $1, 4
-; MIPS32-NEXT: sll $3, $3, 4
-; MIPS32-NEXT: and $3, $3, $6
-; MIPS32-NEXT: or $3, $1, $3
-; MIPS32-NEXT: and $1, $3, $5
-; MIPS32-NEXT: srl $1, $1, 2
-; MIPS32-NEXT: sll $3, $3, 2
-; MIPS32-NEXT: and $3, $3, $5
-; MIPS32-NEXT: or $3, $1, $3
-; MIPS32-NEXT: and $1, $3, $4
-; MIPS32-NEXT: srl $1, $1, 1
-; MIPS32-NEXT: sll $3, $3, 1
-; MIPS32-NEXT: and $3, $3, $4
-; MIPS32-NEXT: or $3, $1, $3
+; MIPS32-NEXT: and $2, $2, $5
+; MIPS32-NEXT: or $3, $3, $2
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32R2-LABEL: bitreverse_i64:
; MIPS32R2: # %bb.0: # %entry
-; MIPS32R2-NEXT: move $1, $4
-; MIPS32R2-NEXT: wsbh $2, $5
-; MIPS32R2-NEXT: rotr $3, $2, 16
+; MIPS32R2-NEXT: wsbh $1, $5
+; MIPS32R2-NEXT: rotr $1, $1, 16
; MIPS32R2-NEXT: lui $2, 61680
-; MIPS32R2-NEXT: ori $6, $2, 61680
-; MIPS32R2-NEXT: and $2, $3, $6
-; MIPS32R2-NEXT: srl $2, $2, 4
-; MIPS32R2-NEXT: sll $3, $3, 4
-; MIPS32R2-NEXT: and $3, $3, $6
-; MIPS32R2-NEXT: or $3, $2, $3
-; MIPS32R2-NEXT: lui $2, 52428
-; MIPS32R2-NEXT: ori $5, $2, 52428
-; MIPS32R2-NEXT: and $2, $3, $5
-; MIPS32R2-NEXT: srl $2, $2, 2
-; MIPS32R2-NEXT: sll $3, $3, 2
-; MIPS32R2-NEXT: and $3, $3, $5
-; MIPS32R2-NEXT: or $3, $2, $3
-; MIPS32R2-NEXT: lui $2, 43690
-; MIPS32R2-NEXT: ori $4, $2, 43690
-; MIPS32R2-NEXT: and $2, $3, $4
-; MIPS32R2-NEXT: srl $2, $2, 1
-; MIPS32R2-NEXT: sll $3, $3, 1
-; MIPS32R2-NEXT: and $3, $3, $4
-; MIPS32R2-NEXT: or $2, $2, $3
-; MIPS32R2-NEXT: wsbh $1, $1
-; MIPS32R2-NEXT: rotr $3, $1, 16
-; MIPS32R2-NEXT: and $1, $3, $6
-; MIPS32R2-NEXT: srl $1, $1, 4
-; MIPS32R2-NEXT: sll $3, $3, 4
-; MIPS32R2-NEXT: and $3, $3, $6
-; MIPS32R2-NEXT: or $3, $1, $3
-; MIPS32R2-NEXT: and $1, $3, $5
-; MIPS32R2-NEXT: srl $1, $1, 2
-; MIPS32R2-NEXT: sll $3, $3, 2
-; MIPS32R2-NEXT: and $3, $3, $5
-; MIPS32R2-NEXT: or $3, $1, $3
-; MIPS32R2-NEXT: and $1, $3, $4
-; MIPS32R2-NEXT: srl $1, $1, 1
-; MIPS32R2-NEXT: sll $3, $3, 1
-; MIPS32R2-NEXT: and $3, $3, $4
-; MIPS32R2-NEXT: or $3, $1, $3
+; MIPS32R2-NEXT: ori $2, $2, 61680
+; MIPS32R2-NEXT: and $3, $1, $2
+; MIPS32R2-NEXT: srl $3, $3, 4
+; MIPS32R2-NEXT: sll $1, $1, 4
+; MIPS32R2-NEXT: and $1, $1, $2
+; MIPS32R2-NEXT: or $1, $3, $1
+; MIPS32R2-NEXT: lui $3, 52428
+; MIPS32R2-NEXT: ori $3, $3, 52428
+; MIPS32R2-NEXT: and $5, $1, $3
+; MIPS32R2-NEXT: srl $5, $5, 2
+; MIPS32R2-NEXT: sll $1, $1, 2
+; MIPS32R2-NEXT: and $1, $1, $3
+; MIPS32R2-NEXT: or $1, $5, $1
+; MIPS32R2-NEXT: lui $5, 43690
+; MIPS32R2-NEXT: ori $5, $5, 43690
+; MIPS32R2-NEXT: and $6, $1, $5
+; MIPS32R2-NEXT: srl $6, $6, 1
+; MIPS32R2-NEXT: sll $1, $1, 1
+; MIPS32R2-NEXT: and $1, $1, $5
+; MIPS32R2-NEXT: or $1, $6, $1
+; MIPS32R2-NEXT: wsbh $4, $4
+; MIPS32R2-NEXT: rotr $4, $4, 16
+; MIPS32R2-NEXT: and $6, $4, $2
+; MIPS32R2-NEXT: srl $6, $6, 4
+; MIPS32R2-NEXT: sll $4, $4, 4
+; MIPS32R2-NEXT: and $2, $4, $2
+; MIPS32R2-NEXT: or $2, $6, $2
+; MIPS32R2-NEXT: and $4, $2, $3
+; MIPS32R2-NEXT: srl $4, $4, 2
+; MIPS32R2-NEXT: sll $2, $2, 2
+; MIPS32R2-NEXT: and $2, $2, $3
+; MIPS32R2-NEXT: or $2, $4, $2
+; MIPS32R2-NEXT: and $3, $2, $5
+; MIPS32R2-NEXT: srl $3, $3, 1
+; MIPS32R2-NEXT: sll $2, $2, 1
+; MIPS32R2-NEXT: and $2, $2, $5
+; MIPS32R2-NEXT: or $3, $3, $2
+; MIPS32R2-NEXT: move $2, $1
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: nop
entry:
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll
index 803b76cbc51a..4022efcafb64 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll
@@ -320,10 +320,10 @@ define i8 @ashr_i8(i8 %a) {
; MIPS32-LABEL: ashr_i8:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: ori $1, $zero, 2
-; MIPS32-NEXT: andi $2, $1, 255
-; MIPS32-NEXT: sll $1, $4, 24
-; MIPS32-NEXT: sra $1, $1, 24
-; MIPS32-NEXT: srav $2, $1, $2
+; MIPS32-NEXT: andi $1, $1, 255
+; MIPS32-NEXT: sll $2, $4, 24
+; MIPS32-NEXT: sra $2, $2, 24
+; MIPS32-NEXT: srav $2, $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -335,9 +335,9 @@ define i16 @lshr_i16(i16 %a) {
; MIPS32-LABEL: lshr_i16:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: ori $1, $zero, 2
-; MIPS32-NEXT: andi $2, $1, 65535
-; MIPS32-NEXT: andi $1, $4, 65535
-; MIPS32-NEXT: srlv $2, $1, $2
+; MIPS32-NEXT: andi $1, $1, 65535
+; MIPS32-NEXT: andi $2, $4, 65535
+; MIPS32-NEXT: srlv $2, $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -348,25 +348,29 @@ entry:
define i64 @shl_i64(i64 %a, i64 %b) {
; MIPS32-LABEL: shl_i64:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: move $3, $4
-; MIPS32-NEXT: move $9, $6
+; MIPS32-NEXT: addiu $sp, $sp, -8
+; MIPS32-NEXT: .cfi_def_cfa_offset 8
; MIPS32-NEXT: ori $1, $zero, 32
-; MIPS32-NEXT: subu $8, $9, $1
-; MIPS32-NEXT: subu $4, $1, $9
-; MIPS32-NEXT: ori $2, $zero, 0
-; MIPS32-NEXT: sltu $6, $9, $1
-; MIPS32-NEXT: sltiu $1, $9, 1
-; MIPS32-NEXT: sllv $7, $3, $9
-; MIPS32-NEXT: srlv $4, $3, $4
-; MIPS32-NEXT: sllv $9, $5, $9
-; MIPS32-NEXT: or $4, $4, $9
-; MIPS32-NEXT: sllv $3, $3, $8
-; MIPS32-NEXT: andi $8, $6, 1
-; MIPS32-NEXT: movn $2, $7, $8
-; MIPS32-NEXT: andi $6, $6, 1
-; MIPS32-NEXT: movn $3, $4, $6
+; MIPS32-NEXT: subu $2, $6, $1
+; MIPS32-NEXT: subu $3, $1, $6
+; MIPS32-NEXT: ori $8, $zero, 0
+; MIPS32-NEXT: sltu $1, $6, $1
+; MIPS32-NEXT: sltiu $9, $6, 1
+; MIPS32-NEXT: sllv $10, $4, $6
+; MIPS32-NEXT: srlv $3, $4, $3
+; MIPS32-NEXT: sllv $6, $5, $6
+; MIPS32-NEXT: or $3, $3, $6
+; MIPS32-NEXT: sllv $2, $4, $2
+; MIPS32-NEXT: andi $4, $1, 1
+; MIPS32-NEXT: movn $8, $10, $4
; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: movn $3, $5, $1
+; MIPS32-NEXT: movn $2, $3, $1
+; MIPS32-NEXT: andi $1, $9, 1
+; MIPS32-NEXT: movn $2, $5, $1
+; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: move $2, $8
+; MIPS32-NEXT: lw $3, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: addiu $sp, $sp, 8
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -377,30 +381,24 @@ entry:
define i64 @ashl_i64(i64 %a, i64 %b) {
; MIPS32-LABEL: ashl_i64:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $sp, $sp, -8
-; MIPS32-NEXT: .cfi_def_cfa_offset 8
-; MIPS32-NEXT: sw $4, 4($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: move $2, $5
-; MIPS32-NEXT: lw $5, 4($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: move $3, $6
; MIPS32-NEXT: ori $1, $zero, 32
-; MIPS32-NEXT: subu $8, $3, $1
-; MIPS32-NEXT: subu $7, $1, $3
-; MIPS32-NEXT: sltu $4, $3, $1
-; MIPS32-NEXT: sltiu $6, $3, 1
-; MIPS32-NEXT: srav $1, $2, $3
-; MIPS32-NEXT: srlv $3, $5, $3
-; MIPS32-NEXT: sllv $7, $2, $7
-; MIPS32-NEXT: or $7, $3, $7
-; MIPS32-NEXT: sra $3, $2, 31
-; MIPS32-NEXT: srav $2, $2, $8
-; MIPS32-NEXT: andi $8, $4, 1
-; MIPS32-NEXT: movn $2, $7, $8
-; MIPS32-NEXT: andi $6, $6, 1
-; MIPS32-NEXT: movn $2, $5, $6
-; MIPS32-NEXT: andi $4, $4, 1
-; MIPS32-NEXT: movn $3, $1, $4
-; MIPS32-NEXT: addiu $sp, $sp, 8
+; MIPS32-NEXT: subu $2, $6, $1
+; MIPS32-NEXT: subu $3, $1, $6
+; MIPS32-NEXT: sltu $1, $6, $1
+; MIPS32-NEXT: sltiu $8, $6, 1
+; MIPS32-NEXT: srav $9, $5, $6
+; MIPS32-NEXT: srlv $6, $4, $6
+; MIPS32-NEXT: sllv $3, $5, $3
+; MIPS32-NEXT: or $3, $6, $3
+; MIPS32-NEXT: sra $6, $5, 31
+; MIPS32-NEXT: srav $2, $5, $2
+; MIPS32-NEXT: andi $5, $1, 1
+; MIPS32-NEXT: movn $2, $3, $5
+; MIPS32-NEXT: andi $3, $8, 1
+; MIPS32-NEXT: movn $2, $4, $3
+; MIPS32-NEXT: andi $1, $1, 1
+; MIPS32-NEXT: movn $6, $9, $1
+; MIPS32-NEXT: move $3, $6
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -411,30 +409,24 @@ entry:
define i64 @lshr_i64(i64 %a, i64 %b) {
; MIPS32-LABEL: lshr_i64:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $sp, $sp, -8
-; MIPS32-NEXT: .cfi_def_cfa_offset 8
-; MIPS32-NEXT: sw $4, 4($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: move $2, $5
-; MIPS32-NEXT: lw $5, 4($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: move $7, $6
; MIPS32-NEXT: ori $1, $zero, 32
-; MIPS32-NEXT: subu $8, $7, $1
-; MIPS32-NEXT: subu $9, $1, $7
-; MIPS32-NEXT: ori $3, $zero, 0
-; MIPS32-NEXT: sltu $4, $7, $1
-; MIPS32-NEXT: sltiu $6, $7, 1
-; MIPS32-NEXT: srlv $1, $2, $7
-; MIPS32-NEXT: srlv $7, $5, $7
-; MIPS32-NEXT: sllv $9, $2, $9
-; MIPS32-NEXT: or $7, $7, $9
-; MIPS32-NEXT: srlv $2, $2, $8
-; MIPS32-NEXT: andi $8, $4, 1
-; MIPS32-NEXT: movn $2, $7, $8
-; MIPS32-NEXT: andi $6, $6, 1
-; MIPS32-NEXT: movn $2, $5, $6
-; MIPS32-NEXT: andi $4, $4, 1
-; MIPS32-NEXT: movn $3, $1, $4
-; MIPS32-NEXT: addiu $sp, $sp, 8
+; MIPS32-NEXT: subu $2, $6, $1
+; MIPS32-NEXT: subu $3, $1, $6
+; MIPS32-NEXT: ori $8, $zero, 0
+; MIPS32-NEXT: sltu $1, $6, $1
+; MIPS32-NEXT: sltiu $9, $6, 1
+; MIPS32-NEXT: srlv $10, $5, $6
+; MIPS32-NEXT: srlv $6, $4, $6
+; MIPS32-NEXT: sllv $3, $5, $3
+; MIPS32-NEXT: or $3, $6, $3
+; MIPS32-NEXT: srlv $2, $5, $2
+; MIPS32-NEXT: andi $5, $1, 1
+; MIPS32-NEXT: movn $2, $3, $5
+; MIPS32-NEXT: andi $3, $9, 1
+; MIPS32-NEXT: movn $2, $4, $3
+; MIPS32-NEXT: andi $1, $1, 1
+; MIPS32-NEXT: movn $8, $10, $1
+; MIPS32-NEXT: move $3, $8
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/branch.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/branch.ll
index 6ce601a94ef7..4600142cb9be 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/branch.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/branch.ll
@@ -30,21 +30,21 @@ define i32 @Conditional_branch(i1 %cond, i32 %a, i32 %b) {
; MIPS32: # %bb.0:
; MIPS32-NEXT: addiu $sp, $sp, -8
; MIPS32-NEXT: .cfi_def_cfa_offset 8
-; MIPS32-NEXT: sw $5, 0($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $6, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: andi $1, $4, 1
+; MIPS32-NEXT: sw $5, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $6, 0($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $1, $BB1_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1:
; MIPS32-NEXT: j $BB1_3
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_2: # %if.then
-; MIPS32-NEXT: lw $2, 0($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS32-NEXT: addiu $sp, $sp, 8
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_3: # %if.else
-; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 0($sp) # 4-byte Folded Reload
; MIPS32-NEXT: addiu $sp, $sp, 8
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/brindirect.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/brindirect.ll
index 9bb803f4cfd3..568558538cdb 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/brindirect.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/brindirect.ll
@@ -6,19 +6,19 @@ define i32 @indirectbr(i8 *%addr) {
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $sp, $sp, -8
; MIPS32-NEXT: .cfi_def_cfa_offset 8
-; MIPS32-NEXT: ori $1, $zero, 1
-; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: ori $2, $zero, 1
; MIPS32-NEXT: ori $1, $zero, 0
-; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill
; MIPS32-NEXT: jr $4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_1: # %L1
-; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 0($sp) # 4-byte Folded Reload
; MIPS32-NEXT: addiu $sp, $sp, 8
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_2: # %L2
-; MIPS32-NEXT: lw $2, 0($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS32-NEXT: addiu $sp, $sp, 8
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll
index b168e13b7f55..3b371cca6fe9 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll
@@ -6,9 +6,9 @@ declare i32 @llvm.bswap.i32(i32)
define i32 @bswap_i32(i32 %x) {
; MIPS32-LABEL: bswap_i32:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: sll $2, $4, 24
-; MIPS32-NEXT: srl $1, $4, 24
-; MIPS32-NEXT: or $1, $1, $2
+; MIPS32-NEXT: sll $1, $4, 24
+; MIPS32-NEXT: srl $2, $4, 24
+; MIPS32-NEXT: or $1, $2, $1
; MIPS32-NEXT: andi $2, $4, 65280
; MIPS32-NEXT: sll $2, $2, 8
; MIPS32-NEXT: or $1, $1, $2
@@ -33,18 +33,18 @@ declare i64 @llvm.bswap.i64(i64)
define i64 @bswap_i64(i64 %x) {
; MIPS32-LABEL: bswap_i64:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: sll $2, $5, 24
-; MIPS32-NEXT: srl $1, $5, 24
-; MIPS32-NEXT: or $1, $1, $2
+; MIPS32-NEXT: sll $1, $5, 24
+; MIPS32-NEXT: srl $2, $5, 24
+; MIPS32-NEXT: or $1, $2, $1
; MIPS32-NEXT: andi $2, $5, 65280
; MIPS32-NEXT: sll $2, $2, 8
; MIPS32-NEXT: or $1, $1, $2
; MIPS32-NEXT: srl $2, $5, 8
; MIPS32-NEXT: andi $2, $2, 65280
; MIPS32-NEXT: or $2, $1, $2
-; MIPS32-NEXT: sll $3, $4, 24
-; MIPS32-NEXT: srl $1, $4, 24
-; MIPS32-NEXT: or $1, $1, $3
+; MIPS32-NEXT: sll $1, $4, 24
+; MIPS32-NEXT: srl $3, $4, 24
+; MIPS32-NEXT: or $1, $3, $1
; MIPS32-NEXT: andi $3, $4, 65280
; MIPS32-NEXT: sll $3, $3, 8
; MIPS32-NEXT: or $1, $1, $3
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll
index 0312f49fa6ee..f7952e446236 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll
@@ -29,10 +29,11 @@ define i32 @call_global(i32 %a0, i32 %a1, i32 %x, i32 %y) {
; MIPS32_PIC-NEXT: .cfi_def_cfa_offset 24
; MIPS32_PIC-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS32_PIC-NEXT: .cfi_offset 31, -4
-; MIPS32_PIC-NEXT: addu $gp, $2, $25
+; MIPS32_PIC-NEXT: addu $1, $2, $25
+; MIPS32_PIC-NEXT: lw $25, %call16(f)($1)
; MIPS32_PIC-NEXT: move $4, $6
; MIPS32_PIC-NEXT: move $5, $7
-; MIPS32_PIC-NEXT: lw $25, %call16(f)($gp)
+; MIPS32_PIC-NEXT: move $gp, $1
; MIPS32_PIC-NEXT: jalr $25
; MIPS32_PIC-NEXT: nop
; MIPS32_PIC-NEXT: addu $2, $2, $2
@@ -88,11 +89,12 @@ define i32 @call_global_with_local_linkage(i32 %a0, i32 %a1, i32 %x, i32 %y) {
; MIPS32_PIC-NEXT: .cfi_def_cfa_offset 24
; MIPS32_PIC-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS32_PIC-NEXT: .cfi_offset 31, -4
-; MIPS32_PIC-NEXT: addu $gp, $2, $25
+; MIPS32_PIC-NEXT: addu $1, $2, $25
+; MIPS32_PIC-NEXT: lw $2, %got(f_with_local_linkage)($1)
+; MIPS32_PIC-NEXT: addiu $25, $2, %lo(f_with_local_linkage)
; MIPS32_PIC-NEXT: move $4, $6
; MIPS32_PIC-NEXT: move $5, $7
-; MIPS32_PIC-NEXT: lw $1, %got(f_with_local_linkage)($gp)
-; MIPS32_PIC-NEXT: addiu $25, $1, %lo(f_with_local_linkage)
+; MIPS32_PIC-NEXT: move $gp, $1
; MIPS32_PIC-NEXT: jalr $25
; MIPS32_PIC-NEXT: nop
; MIPS32_PIC-NEXT: addu $2, $2, $2
@@ -113,9 +115,10 @@ define i32 @call_reg(i32 (i32, i32)* %f_ptr, i32 %x, i32 %y) {
; MIPS32-NEXT: .cfi_def_cfa_offset 24
; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: .cfi_offset 31, -4
-; MIPS32-NEXT: move $25, $4
+; MIPS32-NEXT: sw $4, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: move $4, $5
; MIPS32-NEXT: move $5, $6
+; MIPS32-NEXT: lw $25, 16($sp) # 4-byte Folded Reload
; MIPS32-NEXT: jalr $25
; MIPS32-NEXT: nop
; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
@@ -129,9 +132,10 @@ define i32 @call_reg(i32 (i32, i32)* %f_ptr, i32 %x, i32 %y) {
; MIPS32_PIC-NEXT: .cfi_def_cfa_offset 24
; MIPS32_PIC-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS32_PIC-NEXT: .cfi_offset 31, -4
-; MIPS32_PIC-NEXT: move $25, $4
+; MIPS32_PIC-NEXT: sw $4, 16($sp) # 4-byte Folded Spill
; MIPS32_PIC-NEXT: move $4, $5
; MIPS32_PIC-NEXT: move $5, $6
+; MIPS32_PIC-NEXT: lw $25, 16($sp) # 4-byte Folded Reload
; MIPS32_PIC-NEXT: jalr $25
; MIPS32_PIC-NEXT: nop
; MIPS32_PIC-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll
index 65fce9d4f5d5..4030cfbf57e6 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll
@@ -17,14 +17,14 @@ declare i32 @llvm.ctlz.i32(i32, i1 immarg)
define i64 @ctlz_i64(i64 %a) {
; MIPS32-LABEL: ctlz_i64:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: move $1, $4
; MIPS32-NEXT: ori $3, $zero, 0
-; MIPS32-NEXT: sltiu $4, $5, 1
-; MIPS32-NEXT: clz $1, $1
-; MIPS32-NEXT: addiu $1, $1, 32
-; MIPS32-NEXT: clz $2, $5
-; MIPS32-NEXT: andi $4, $4, 1
-; MIPS32-NEXT: movn $2, $1, $4
+; MIPS32-NEXT: sltiu $1, $5, 1
+; MIPS32-NEXT: clz $2, $4
+; MIPS32-NEXT: addiu $2, $2, 32
+; MIPS32-NEXT: clz $4, $5
+; MIPS32-NEXT: andi $1, $1, 1
+; MIPS32-NEXT: movn $4, $2, $1
+; MIPS32-NEXT: move $2, $4
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctpop.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctpop.ll
index 7ac9c4332fed..5d7a2f23eac1 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctpop.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctpop.ll
@@ -8,15 +8,15 @@ define i32 @ctpop_i32(i32 %a) {
; MIPS32-NEXT: lui $2, 21845
; MIPS32-NEXT: ori $2, $2, 21845
; MIPS32-NEXT: and $1, $1, $2
-; MIPS32-NEXT: subu $2, $4, $1
-; MIPS32-NEXT: srl $1, $2, 2
+; MIPS32-NEXT: subu $1, $4, $1
+; MIPS32-NEXT: srl $2, $1, 2
; MIPS32-NEXT: lui $3, 13107
; MIPS32-NEXT: ori $3, $3, 13107
-; MIPS32-NEXT: and $1, $1, $3
; MIPS32-NEXT: and $2, $2, $3
-; MIPS32-NEXT: addu $2, $1, $2
-; MIPS32-NEXT: srl $1, $2, 4
-; MIPS32-NEXT: addu $1, $1, $2
+; MIPS32-NEXT: and $1, $1, $3
+; MIPS32-NEXT: addu $1, $2, $1
+; MIPS32-NEXT: srl $2, $1, 4
+; MIPS32-NEXT: addu $1, $2, $1
; MIPS32-NEXT: lui $2, 3855
; MIPS32-NEXT: ori $2, $2, 3855
; MIPS32-NEXT: and $1, $1, $2
@@ -38,37 +38,37 @@ define i64 @ctpop_i64(i64 %a) {
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: srl $1, $4, 1
; MIPS32-NEXT: lui $2, 21845
-; MIPS32-NEXT: ori $7, $2, 21845
-; MIPS32-NEXT: and $1, $1, $7
-; MIPS32-NEXT: subu $2, $4, $1
-; MIPS32-NEXT: srl $1, $2, 2
-; MIPS32-NEXT: lui $3, 13107
-; MIPS32-NEXT: ori $6, $3, 13107
-; MIPS32-NEXT: and $1, $1, $6
-; MIPS32-NEXT: and $2, $2, $6
-; MIPS32-NEXT: addu $2, $1, $2
-; MIPS32-NEXT: srl $1, $2, 4
-; MIPS32-NEXT: addu $1, $1, $2
-; MIPS32-NEXT: lui $2, 3855
-; MIPS32-NEXT: ori $4, $2, 3855
-; MIPS32-NEXT: and $1, $1, $4
-; MIPS32-NEXT: lui $2, 257
-; MIPS32-NEXT: ori $3, $2, 257
-; MIPS32-NEXT: mul $1, $1, $3
-; MIPS32-NEXT: srl $2, $1, 24
-; MIPS32-NEXT: srl $1, $5, 1
-; MIPS32-NEXT: and $1, $1, $7
-; MIPS32-NEXT: subu $5, $5, $1
-; MIPS32-NEXT: srl $1, $5, 2
-; MIPS32-NEXT: and $1, $1, $6
-; MIPS32-NEXT: and $5, $5, $6
-; MIPS32-NEXT: addu $5, $1, $5
-; MIPS32-NEXT: srl $1, $5, 4
-; MIPS32-NEXT: addu $1, $1, $5
+; MIPS32-NEXT: ori $2, $2, 21845
+; MIPS32-NEXT: and $1, $1, $2
+; MIPS32-NEXT: subu $1, $4, $1
+; MIPS32-NEXT: srl $3, $1, 2
+; MIPS32-NEXT: lui $4, 13107
+; MIPS32-NEXT: ori $4, $4, 13107
+; MIPS32-NEXT: and $3, $3, $4
; MIPS32-NEXT: and $1, $1, $4
-; MIPS32-NEXT: mul $1, $1, $3
+; MIPS32-NEXT: addu $1, $3, $1
+; MIPS32-NEXT: srl $3, $1, 4
+; MIPS32-NEXT: addu $1, $3, $1
+; MIPS32-NEXT: lui $3, 3855
+; MIPS32-NEXT: ori $3, $3, 3855
+; MIPS32-NEXT: and $1, $1, $3
+; MIPS32-NEXT: lui $6, 257
+; MIPS32-NEXT: ori $6, $6, 257
+; MIPS32-NEXT: mul $1, $1, $6
; MIPS32-NEXT: srl $1, $1, 24
-; MIPS32-NEXT: addu $2, $1, $2
+; MIPS32-NEXT: srl $7, $5, 1
+; MIPS32-NEXT: and $2, $7, $2
+; MIPS32-NEXT: subu $2, $5, $2
+; MIPS32-NEXT: srl $5, $2, 2
+; MIPS32-NEXT: and $5, $5, $4
+; MIPS32-NEXT: and $2, $2, $4
+; MIPS32-NEXT: addu $2, $5, $2
+; MIPS32-NEXT: srl $4, $2, 4
+; MIPS32-NEXT: addu $2, $4, $2
+; MIPS32-NEXT: and $2, $2, $3
+; MIPS32-NEXT: mul $2, $2, $6
+; MIPS32-NEXT: srl $2, $2, 24
+; MIPS32-NEXT: addu $2, $2, $1
; MIPS32-NEXT: ori $3, $zero, 0
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll
index 44a2e619f715..3ea5329da548 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll
@@ -6,10 +6,10 @@ define i32 @cttz_i32(i32 %a) {
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: not $1, $4
; MIPS32-NEXT: addiu $2, $4, -1
-; MIPS32-NEXT: and $2, $1, $2
-; MIPS32-NEXT: ori $1, $zero, 32
-; MIPS32-NEXT: clz $2, $2
-; MIPS32-NEXT: subu $2, $1, $2
+; MIPS32-NEXT: and $1, $1, $2
+; MIPS32-NEXT: ori $2, $zero, 32
+; MIPS32-NEXT: clz $1, $1
+; MIPS32-NEXT: subu $2, $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -21,23 +21,23 @@ declare i32 @llvm.cttz.i32(i32, i1 immarg)
define i64 @cttz_i64(i64 %a) {
; MIPS32-LABEL: cttz_i64:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: move $6, $4
; MIPS32-NEXT: ori $3, $zero, 0
-; MIPS32-NEXT: sltiu $4, $6, 1
-; MIPS32-NEXT: not $1, $5
-; MIPS32-NEXT: addiu $2, $5, -1
-; MIPS32-NEXT: and $1, $1, $2
-; MIPS32-NEXT: ori $2, $zero, 32
-; MIPS32-NEXT: clz $1, $1
-; MIPS32-NEXT: subu $1, $2, $1
-; MIPS32-NEXT: addiu $1, $1, 32
-; MIPS32-NEXT: not $5, $6
-; MIPS32-NEXT: addiu $6, $6, -1
-; MIPS32-NEXT: and $5, $5, $6
-; MIPS32-NEXT: clz $5, $5
-; MIPS32-NEXT: subu $2, $2, $5
-; MIPS32-NEXT: andi $4, $4, 1
-; MIPS32-NEXT: movn $2, $1, $4
+; MIPS32-NEXT: sltiu $1, $4, 1
+; MIPS32-NEXT: not $2, $5
+; MIPS32-NEXT: addiu $5, $5, -1
+; MIPS32-NEXT: and $2, $2, $5
+; MIPS32-NEXT: ori $5, $zero, 32
+; MIPS32-NEXT: clz $2, $2
+; MIPS32-NEXT: subu $2, $5, $2
+; MIPS32-NEXT: addiu $2, $2, 32
+; MIPS32-NEXT: not $6, $4
+; MIPS32-NEXT: addiu $4, $4, -1
+; MIPS32-NEXT: and $4, $6, $4
+; MIPS32-NEXT: clz $4, $4
+; MIPS32-NEXT: subu $4, $5, $4
+; MIPS32-NEXT: andi $1, $1, 1
+; MIPS32-NEXT: movn $4, $2, $1
+; MIPS32-NEXT: move $2, $4
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -53,10 +53,10 @@ define i32 @ffs_i32_expansion(i32 %a) {
; MIPS32-NEXT: ori $1, $zero, 0
; MIPS32-NEXT: not $2, $4
; MIPS32-NEXT: addiu $3, $4, -1
-; MIPS32-NEXT: and $3, $2, $3
-; MIPS32-NEXT: ori $2, $zero, 32
-; MIPS32-NEXT: clz $3, $3
-; MIPS32-NEXT: subu $2, $2, $3
+; MIPS32-NEXT: and $2, $2, $3
+; MIPS32-NEXT: ori $3, $zero, 32
+; MIPS32-NEXT: clz $2, $2
+; MIPS32-NEXT: subu $2, $3, $2
; MIPS32-NEXT: addiu $2, $2, 1
; MIPS32-NEXT: sltiu $3, $4, 1
; MIPS32-NEXT: andi $3, $3, 1
@@ -74,35 +74,37 @@ entry:
define i64 @ffs_i64_expansion(i64 %a) {
; MIPS32-LABEL: ffs_i64_expansion:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: ori $3, $zero, 1
-; MIPS32-NEXT: ori $1, $zero, 0
-; MIPS32-NEXT: sltiu $7, $4, 1
-; MIPS32-NEXT: not $2, $5
-; MIPS32-NEXT: addiu $6, $5, -1
-; MIPS32-NEXT: and $6, $2, $6
-; MIPS32-NEXT: ori $2, $zero, 32
+; MIPS32-NEXT: ori $1, $zero, 1
+; MIPS32-NEXT: ori $2, $zero, 0
+; MIPS32-NEXT: sltiu $3, $4, 1
+; MIPS32-NEXT: not $6, $5
+; MIPS32-NEXT: addiu $7, $5, -1
+; MIPS32-NEXT: and $6, $6, $7
+; MIPS32-NEXT: ori $7, $zero, 32
; MIPS32-NEXT: clz $6, $6
-; MIPS32-NEXT: subu $6, $2, $6
+; MIPS32-NEXT: subu $6, $7, $6
; MIPS32-NEXT: addiu $6, $6, 32
; MIPS32-NEXT: not $8, $4
; MIPS32-NEXT: addiu $9, $4, -1
; MIPS32-NEXT: and $8, $8, $9
; MIPS32-NEXT: clz $8, $8
-; MIPS32-NEXT: subu $2, $2, $8
-; MIPS32-NEXT: andi $7, $7, 1
-; MIPS32-NEXT: movn $2, $6, $7
-; MIPS32-NEXT: addiu $2, $2, 1
-; MIPS32-NEXT: sltu $6, $2, $3
-; MIPS32-NEXT: addiu $3, $1, 0
-; MIPS32-NEXT: andi $6, $6, 1
-; MIPS32-NEXT: addu $3, $3, $6
+; MIPS32-NEXT: subu $7, $7, $8
+; MIPS32-NEXT: andi $3, $3, 1
+; MIPS32-NEXT: movn $7, $6, $3
+; MIPS32-NEXT: addiu $3, $7, 1
+; MIPS32-NEXT: sltu $1, $3, $1
+; MIPS32-NEXT: addiu $6, $2, 0
+; MIPS32-NEXT: andi $1, $1, 1
+; MIPS32-NEXT: addu $1, $6, $1
; MIPS32-NEXT: xori $4, $4, 0
; MIPS32-NEXT: xori $5, $5, 0
; MIPS32-NEXT: or $4, $4, $5
; MIPS32-NEXT: sltiu $4, $4, 1
; MIPS32-NEXT: andi $4, $4, 1
-; MIPS32-NEXT: movn $2, $1, $4
-; MIPS32-NEXT: movn $3, $1, $4
+; MIPS32-NEXT: movn $3, $2, $4
+; MIPS32-NEXT: movn $1, $2, $4
+; MIPS32-NEXT: move $2, $3
+; MIPS32-NEXT: move $3, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll
index 294bc71443ea..fcc2d6ef0a93 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll
@@ -15,32 +15,35 @@ define void @Print_c_N_times(i8 %c, i32 %N) {
; MIPS32-NEXT: .cfi_offset 30, -8
; MIPS32-NEXT: move $fp, $sp
; MIPS32-NEXT: .cfi_def_cfa_register 30
-; MIPS32-NEXT: sw $4, 8($fp) # 4-byte Folded Spill
-; MIPS32-NEXT: move $6, $5
-; MIPS32-NEXT: lw $5, 8($fp) # 4-byte Folded Reload
-; MIPS32-NEXT: sw $6, 12($fp) # 4-byte Folded Spill
-; MIPS32-NEXT: ori $2, $zero, 1
-; MIPS32-NEXT: ori $1, $zero, 0
-; MIPS32-NEXT: sw $1, 16($fp) # 4-byte Folded Spill
-; MIPS32-NEXT: addiu $1, $6, 1
-; MIPS32-NEXT: mul $1, $1, $2
+; MIPS32-NEXT: ori $1, $zero, 1
+; MIPS32-NEXT: ori $2, $zero, 0
+; MIPS32-NEXT: addiu $3, $5, 1
+; MIPS32-NEXT: mul $1, $3, $1
; MIPS32-NEXT: addiu $1, $1, 7
-; MIPS32-NEXT: addiu $2, $zero, 65528
-; MIPS32-NEXT: and $2, $1, $2
-; MIPS32-NEXT: move $1, $sp
-; MIPS32-NEXT: subu $4, $1, $2
-; MIPS32-NEXT: sw $4, 20($fp) # 4-byte Folded Spill
-; MIPS32-NEXT: move $sp, $4
+; MIPS32-NEXT: addiu $3, $zero, 65528
+; MIPS32-NEXT: and $1, $1, $3
+; MIPS32-NEXT: move $3, $sp
+; MIPS32-NEXT: subu $1, $3, $1
+; MIPS32-NEXT: move $sp, $1
; MIPS32-NEXT: addiu $sp, $sp, -16
+; MIPS32-NEXT: sw $4, 20($fp) # 4-byte Folded Spill
+; MIPS32-NEXT: move $4, $1
+; MIPS32-NEXT: lw $3, 20($fp) # 4-byte Folded Reload
+; MIPS32-NEXT: sw $5, 16($fp) # 4-byte Folded Spill
+; MIPS32-NEXT: move $5, $3
+; MIPS32-NEXT: lw $6, 16($fp) # 4-byte Folded Reload
+; MIPS32-NEXT: sw $2, 12($fp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $1, 8($fp) # 4-byte Folded Spill
; MIPS32-NEXT: jal memset
; MIPS32-NEXT: nop
-; MIPS32-NEXT: lw $5, 12($fp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 16($fp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $4, 20($fp) # 4-byte Folded Reload
; MIPS32-NEXT: addiu $sp, $sp, 16
-; MIPS32-NEXT: addu $2, $4, $5
-; MIPS32-NEXT: sb $1, 0($2)
+; MIPS32-NEXT: lw $1, 8($fp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 16($fp) # 4-byte Folded Reload
+; MIPS32-NEXT: addu $3, $1, $2
+; MIPS32-NEXT: lw $4, 12($fp) # 4-byte Folded Reload
+; MIPS32-NEXT: sb $4, 0($3)
; MIPS32-NEXT: addiu $sp, $sp, -16
+; MIPS32-NEXT: move $4, $1
; MIPS32-NEXT: jal puts
; MIPS32-NEXT: nop
; MIPS32-NEXT: addiu $sp, $sp, 16
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll
index bfff4e72d0ab..58d5c8a160a6 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll
@@ -27,9 +27,10 @@ entry:
define i1 @uno_s(float %x, float %y) {
; MIPS32-LABEL: uno_s:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.un.s $f12, $f14
-; MIPS32-NEXT: movf $2, $zero, $fcc0
+; MIPS32-NEXT: movf $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -39,9 +40,10 @@ entry:
define i1 @ord_s(float %x, float %y) {
; MIPS32-LABEL: ord_s:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.un.s $f12, $f14
-; MIPS32-NEXT: movt $2, $zero, $fcc0
+; MIPS32-NEXT: movt $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -53,9 +55,10 @@ entry:
define i1 @oeq_s(float %x, float %y) {
; MIPS32-LABEL: oeq_s:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.eq.s $f12, $f14
-; MIPS32-NEXT: movf $2, $zero, $fcc0
+; MIPS32-NEXT: movf $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -65,9 +68,10 @@ entry:
define i1 @une_s(float %x, float %y) {
; MIPS32-LABEL: une_s:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.eq.s $f12, $f14
-; MIPS32-NEXT: movt $2, $zero, $fcc0
+; MIPS32-NEXT: movt $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -79,9 +83,10 @@ entry:
define i1 @ueq_s(float %x, float %y) {
; MIPS32-LABEL: ueq_s:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.ueq.s $f12, $f14
-; MIPS32-NEXT: movf $2, $zero, $fcc0
+; MIPS32-NEXT: movf $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -91,9 +96,10 @@ entry:
define i1 @one_s(float %x, float %y) {
; MIPS32-LABEL: one_s:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.ueq.s $f12, $f14
-; MIPS32-NEXT: movt $2, $zero, $fcc0
+; MIPS32-NEXT: movt $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -105,9 +111,10 @@ entry:
define i1 @olt_s(float %x, float %y) {
; MIPS32-LABEL: olt_s:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.olt.s $f12, $f14
-; MIPS32-NEXT: movf $2, $zero, $fcc0
+; MIPS32-NEXT: movf $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -117,9 +124,10 @@ entry:
define i1 @uge_s(float %x, float %y) {
; MIPS32-LABEL: uge_s:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.olt.s $f12, $f14
-; MIPS32-NEXT: movt $2, $zero, $fcc0
+; MIPS32-NEXT: movt $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -131,9 +139,10 @@ entry:
define i1 @ult_s(float %x, float %y) {
; MIPS32-LABEL: ult_s:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.ult.s $f12, $f14
-; MIPS32-NEXT: movf $2, $zero, $fcc0
+; MIPS32-NEXT: movf $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -143,9 +152,10 @@ entry:
define i1 @oge_s(float %x, float %y) {
; MIPS32-LABEL: oge_s:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.ult.s $f12, $f14
-; MIPS32-NEXT: movt $2, $zero, $fcc0
+; MIPS32-NEXT: movt $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -157,9 +167,10 @@ entry:
define i1 @ole_s(float %x, float %y) {
; MIPS32-LABEL: ole_s:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.ole.s $f12, $f14
-; MIPS32-NEXT: movf $2, $zero, $fcc0
+; MIPS32-NEXT: movf $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -169,9 +180,10 @@ entry:
define i1 @ugt_s(float %x, float %y) {
; MIPS32-LABEL: ugt_s:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.ole.s $f12, $f14
-; MIPS32-NEXT: movt $2, $zero, $fcc0
+; MIPS32-NEXT: movt $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -183,9 +195,10 @@ entry:
define i1 @ule_s(float %x, float %y) {
; MIPS32-LABEL: ule_s:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.ule.s $f12, $f14
-; MIPS32-NEXT: movf $2, $zero, $fcc0
+; MIPS32-NEXT: movf $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -195,9 +208,10 @@ entry:
define i1 @ogt_s(float %x, float %y) {
; MIPS32-LABEL: ogt_s:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.ule.s $f12, $f14
-; MIPS32-NEXT: movt $2, $zero, $fcc0
+; MIPS32-NEXT: movt $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -231,9 +245,10 @@ entry:
define i1 @uno_d(double %x, double %y) {
; MIPS32-LABEL: uno_d:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.un.d $f12, $f14
-; MIPS32-NEXT: movf $2, $zero, $fcc0
+; MIPS32-NEXT: movf $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -243,9 +258,10 @@ entry:
define i1 @ord_d(double %x, double %y) {
; MIPS32-LABEL: ord_d:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.un.d $f12, $f14
-; MIPS32-NEXT: movt $2, $zero, $fcc0
+; MIPS32-NEXT: movt $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -257,9 +273,10 @@ entry:
define i1 @oeq_d(double %x, double %y) {
; MIPS32-LABEL: oeq_d:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.eq.d $f12, $f14
-; MIPS32-NEXT: movf $2, $zero, $fcc0
+; MIPS32-NEXT: movf $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -269,9 +286,10 @@ entry:
define i1 @une_d(double %x, double %y) {
; MIPS32-LABEL: une_d:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.eq.d $f12, $f14
-; MIPS32-NEXT: movt $2, $zero, $fcc0
+; MIPS32-NEXT: movt $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -283,9 +301,10 @@ entry:
define i1 @ueq_d(double %x, double %y) {
; MIPS32-LABEL: ueq_d:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.ueq.d $f12, $f14
-; MIPS32-NEXT: movf $2, $zero, $fcc0
+; MIPS32-NEXT: movf $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -295,9 +314,10 @@ entry:
define i1 @one_d(double %x, double %y) {
; MIPS32-LABEL: one_d:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.ueq.d $f12, $f14
-; MIPS32-NEXT: movt $2, $zero, $fcc0
+; MIPS32-NEXT: movt $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -309,9 +329,10 @@ entry:
define i1 @olt_d(double %x, double %y) {
; MIPS32-LABEL: olt_d:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.olt.d $f12, $f14
-; MIPS32-NEXT: movf $2, $zero, $fcc0
+; MIPS32-NEXT: movf $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -321,9 +342,10 @@ entry:
define i1 @uge_d(double %x, double %y) {
; MIPS32-LABEL: uge_d:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.olt.d $f12, $f14
-; MIPS32-NEXT: movt $2, $zero, $fcc0
+; MIPS32-NEXT: movt $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -335,9 +357,10 @@ entry:
define i1 @ult_d(double %x, double %y) {
; MIPS32-LABEL: ult_d:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.ult.d $f12, $f14
-; MIPS32-NEXT: movf $2, $zero, $fcc0
+; MIPS32-NEXT: movf $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -347,9 +370,10 @@ entry:
define i1 @oge_d(double %x, double %y) {
; MIPS32-LABEL: oge_d:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.ult.d $f12, $f14
-; MIPS32-NEXT: movt $2, $zero, $fcc0
+; MIPS32-NEXT: movt $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -361,9 +385,10 @@ entry:
define i1 @ole_d(double %x, double %y) {
; MIPS32-LABEL: ole_d:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.ole.d $f12, $f14
-; MIPS32-NEXT: movf $2, $zero, $fcc0
+; MIPS32-NEXT: movf $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -373,9 +398,10 @@ entry:
define i1 @ugt_d(double %x, double %y) {
; MIPS32-LABEL: ugt_d:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.ole.d $f12, $f14
-; MIPS32-NEXT: movt $2, $zero, $fcc0
+; MIPS32-NEXT: movt $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -387,9 +413,10 @@ entry:
define i1 @ule_d(double %x, double %y) {
; MIPS32-LABEL: ule_d:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.ule.d $f12, $f14
-; MIPS32-NEXT: movf $2, $zero, $fcc0
+; MIPS32-NEXT: movf $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -399,9 +426,10 @@ entry:
define i1 @ogt_d(double %x, double %y) {
; MIPS32-LABEL: ogt_d:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $2, $zero, 1
+; MIPS32-NEXT: addiu $1, $zero, 1
; MIPS32-NEXT: c.ule.d $f12, $f14
-; MIPS32-NEXT: movt $2, $zero, $fcc0
+; MIPS32-NEXT: movt $1, $zero, $fcc0
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/float_constants.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/float_constants.ll
index 85feeda82e25..f4ca9e5b5371 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/float_constants.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/float_constants.ll
@@ -18,22 +18,22 @@ define double @e_double_precision() {
; FP32-LABEL: e_double_precision:
; FP32: # %bb.0: # %entry
; FP32-NEXT: lui $1, 16389
-; FP32-NEXT: ori $2, $1, 48906
-; FP32-NEXT: lui $1, 35604
-; FP32-NEXT: ori $1, $1, 22377
-; FP32-NEXT: mtc1 $1, $f0
-; FP32-NEXT: mtc1 $2, $f1
+; FP32-NEXT: ori $1, $1, 48906
+; FP32-NEXT: lui $2, 35604
+; FP32-NEXT: ori $2, $2, 22377
+; FP32-NEXT: mtc1 $2, $f0
+; FP32-NEXT: mtc1 $1, $f1
; FP32-NEXT: jr $ra
; FP32-NEXT: nop
;
; FP64-LABEL: e_double_precision:
; FP64: # %bb.0: # %entry
; FP64-NEXT: lui $1, 16389
-; FP64-NEXT: ori $2, $1, 48906
-; FP64-NEXT: lui $1, 35604
-; FP64-NEXT: ori $1, $1, 22377
-; FP64-NEXT: mtc1 $1, $f0
-; FP64-NEXT: mthc1 $2, $f0
+; FP64-NEXT: ori $1, $1, 48906
+; FP64-NEXT: lui $2, 35604
+; FP64-NEXT: ori $2, $2, 22377
+; FP64-NEXT: mtc1 $2, $f0
+; FP64-NEXT: mthc1 $1, $f0
; FP64-NEXT: jr $ra
; FP64-NEXT: nop
entry:
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll
index e9cc0b933f71..a98c6eb9fd6c 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll
@@ -164,20 +164,20 @@ define zeroext i16 @f32tou16(float %a) {
; MIPS32-LABEL: f32tou16:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: trunc.w.s $f0, $f12
-; MIPS32-NEXT: mfc1 $2, $f0
-; MIPS32-NEXT: lui $1, 20224
-; MIPS32-NEXT: mtc1 $1, $f0
+; MIPS32-NEXT: mfc1 $1, $f0
+; MIPS32-NEXT: lui $2, 20224
+; MIPS32-NEXT: mtc1 $2, $f0
; MIPS32-NEXT: sub.s $f1, $f12, $f0
; MIPS32-NEXT: trunc.w.s $f1, $f1
-; MIPS32-NEXT: mfc1 $1, $f1
+; MIPS32-NEXT: mfc1 $2, $f1
; MIPS32-NEXT: lui $3, 32768
-; MIPS32-NEXT: xor $1, $1, $3
+; MIPS32-NEXT: xor $2, $2, $3
; MIPS32-NEXT: addiu $3, $zero, 1
; MIPS32-NEXT: c.ult.s $f12, $f0
; MIPS32-NEXT: movf $3, $zero, $fcc0
; MIPS32-NEXT: andi $3, $3, 1
-; MIPS32-NEXT: movn $1, $2, $3
-; MIPS32-NEXT: andi $2, $1, 65535
+; MIPS32-NEXT: movn $2, $1, $3
+; MIPS32-NEXT: andi $2, $2, 65535
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -189,20 +189,20 @@ define zeroext i8 @f32tou8(float %a) {
; MIPS32-LABEL: f32tou8:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: trunc.w.s $f0, $f12
-; MIPS32-NEXT: mfc1 $2, $f0
-; MIPS32-NEXT: lui $1, 20224
-; MIPS32-NEXT: mtc1 $1, $f0
+; MIPS32-NEXT: mfc1 $1, $f0
+; MIPS32-NEXT: lui $2, 20224
+; MIPS32-NEXT: mtc1 $2, $f0
; MIPS32-NEXT: sub.s $f1, $f12, $f0
; MIPS32-NEXT: trunc.w.s $f1, $f1
-; MIPS32-NEXT: mfc1 $1, $f1
+; MIPS32-NEXT: mfc1 $2, $f1
; MIPS32-NEXT: lui $3, 32768
-; MIPS32-NEXT: xor $1, $1, $3
+; MIPS32-NEXT: xor $2, $2, $3
; MIPS32-NEXT: addiu $3, $zero, 1
; MIPS32-NEXT: c.ult.s $f12, $f0
; MIPS32-NEXT: movf $3, $zero, $fcc0
; MIPS32-NEXT: andi $3, $3, 1
-; MIPS32-NEXT: movn $1, $2, $3
-; MIPS32-NEXT: andi $2, $1, 255
+; MIPS32-NEXT: movn $2, $1, $3
+; MIPS32-NEXT: andi $2, $2, 255
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -233,10 +233,10 @@ define i32 @f64tou32(double %a) {
; FP32: # %bb.0: # %entry
; FP32-NEXT: trunc.w.d $f0, $f12
; FP32-NEXT: mfc1 $1, $f0
-; FP32-NEXT: lui $3, 16864
-; FP32-NEXT: ori $2, $zero, 0
-; FP32-NEXT: mtc1 $2, $f0
-; FP32-NEXT: mtc1 $3, $f1
+; FP32-NEXT: lui $2, 16864
+; FP32-NEXT: ori $3, $zero, 0
+; FP32-NEXT: mtc1 $3, $f0
+; FP32-NEXT: mtc1 $2, $f1
; FP32-NEXT: sub.d $f2, $f12, $f0
; FP32-NEXT: trunc.w.d $f2, $f2
; FP32-NEXT: mfc1 $2, $f2
@@ -254,10 +254,10 @@ define i32 @f64tou32(double %a) {
; FP64: # %bb.0: # %entry
; FP64-NEXT: trunc.w.d $f0, $f12
; FP64-NEXT: mfc1 $1, $f0
-; FP64-NEXT: lui $3, 16864
-; FP64-NEXT: ori $2, $zero, 0
-; FP64-NEXT: mtc1 $2, $f0
-; FP64-NEXT: mthc1 $3, $f0
+; FP64-NEXT: lui $2, 16864
+; FP64-NEXT: ori $3, $zero, 0
+; FP64-NEXT: mtc1 $3, $f0
+; FP64-NEXT: mthc1 $2, $f0
; FP64-NEXT: sub.d $f1, $f12, $f0
; FP64-NEXT: trunc.w.d $f1, $f1
; FP64-NEXT: mfc1 $2, $f1
@@ -279,44 +279,44 @@ define zeroext i16 @f64tou16(double %a) {
; FP32-LABEL: f64tou16:
; FP32: # %bb.0: # %entry
; FP32-NEXT: trunc.w.d $f0, $f12
-; FP32-NEXT: mfc1 $2, $f0
-; FP32-NEXT: lui $3, 16864
-; FP32-NEXT: ori $1, $zero, 0
-; FP32-NEXT: mtc1 $1, $f0
-; FP32-NEXT: mtc1 $3, $f1
+; FP32-NEXT: mfc1 $1, $f0
+; FP32-NEXT: lui $2, 16864
+; FP32-NEXT: ori $3, $zero, 0
+; FP32-NEXT: mtc1 $3, $f0
+; FP32-NEXT: mtc1 $2, $f1
; FP32-NEXT: sub.d $f2, $f12, $f0
; FP32-NEXT: trunc.w.d $f2, $f2
-; FP32-NEXT: mfc1 $1, $f2
+; FP32-NEXT: mfc1 $2, $f2
; FP32-NEXT: lui $3, 32768
-; FP32-NEXT: xor $1, $1, $3
+; FP32-NEXT: xor $2, $2, $3
; FP32-NEXT: addiu $3, $zero, 1
; FP32-NEXT: c.ult.d $f12, $f0
; FP32-NEXT: movf $3, $zero, $fcc0
; FP32-NEXT: andi $3, $3, 1
-; FP32-NEXT: movn $1, $2, $3
-; FP32-NEXT: andi $2, $1, 65535
+; FP32-NEXT: movn $2, $1, $3
+; FP32-NEXT: andi $2, $2, 65535
; FP32-NEXT: jr $ra
; FP32-NEXT: nop
;
; FP64-LABEL: f64tou16:
; FP64: # %bb.0: # %entry
; FP64-NEXT: trunc.w.d $f0, $f12
-; FP64-NEXT: mfc1 $2, $f0
-; FP64-NEXT: lui $3, 16864
-; FP64-NEXT: ori $1, $zero, 0
-; FP64-NEXT: mtc1 $1, $f0
-; FP64-NEXT: mthc1 $3, $f0
+; FP64-NEXT: mfc1 $1, $f0
+; FP64-NEXT: lui $2, 16864
+; FP64-NEXT: ori $3, $zero, 0
+; FP64-NEXT: mtc1 $3, $f0
+; FP64-NEXT: mthc1 $2, $f0
; FP64-NEXT: sub.d $f1, $f12, $f0
; FP64-NEXT: trunc.w.d $f1, $f1
-; FP64-NEXT: mfc1 $1, $f1
+; FP64-NEXT: mfc1 $2, $f1
; FP64-NEXT: lui $3, 32768
-; FP64-NEXT: xor $1, $1, $3
+; FP64-NEXT: xor $2, $2, $3
; FP64-NEXT: addiu $3, $zero, 1
; FP64-NEXT: c.ult.d $f12, $f0
; FP64-NEXT: movf $3, $zero, $fcc0
; FP64-NEXT: andi $3, $3, 1
-; FP64-NEXT: movn $1, $2, $3
-; FP64-NEXT: andi $2, $1, 65535
+; FP64-NEXT: movn $2, $1, $3
+; FP64-NEXT: andi $2, $2, 65535
; FP64-NEXT: jr $ra
; FP64-NEXT: nop
entry:
@@ -328,44 +328,44 @@ define zeroext i8 @f64tou8(double %a) {
; FP32-LABEL: f64tou8:
; FP32: # %bb.0: # %entry
; FP32-NEXT: trunc.w.d $f0, $f12
-; FP32-NEXT: mfc1 $2, $f0
-; FP32-NEXT: lui $3, 16864
-; FP32-NEXT: ori $1, $zero, 0
-; FP32-NEXT: mtc1 $1, $f0
-; FP32-NEXT: mtc1 $3, $f1
+; FP32-NEXT: mfc1 $1, $f0
+; FP32-NEXT: lui $2, 16864
+; FP32-NEXT: ori $3, $zero, 0
+; FP32-NEXT: mtc1 $3, $f0
+; FP32-NEXT: mtc1 $2, $f1
; FP32-NEXT: sub.d $f2, $f12, $f0
; FP32-NEXT: trunc.w.d $f2, $f2
-; FP32-NEXT: mfc1 $1, $f2
+; FP32-NEXT: mfc1 $2, $f2
; FP32-NEXT: lui $3, 32768
-; FP32-NEXT: xor $1, $1, $3
+; FP32-NEXT: xor $2, $2, $3
; FP32-NEXT: addiu $3, $zero, 1
; FP32-NEXT: c.ult.d $f12, $f0
; FP32-NEXT: movf $3, $zero, $fcc0
; FP32-NEXT: andi $3, $3, 1
-; FP32-NEXT: movn $1, $2, $3
-; FP32-NEXT: andi $2, $1, 255
+; FP32-NEXT: movn $2, $1, $3
+; FP32-NEXT: andi $2, $2, 255
; FP32-NEXT: jr $ra
; FP32-NEXT: nop
;
; FP64-LABEL: f64tou8:
; FP64: # %bb.0: # %entry
; FP64-NEXT: trunc.w.d $f0, $f12
-; FP64-NEXT: mfc1 $2, $f0
-; FP64-NEXT: lui $3, 16864
-; FP64-NEXT: ori $1, $zero, 0
-; FP64-NEXT: mtc1 $1, $f0
-; FP64-NEXT: mthc1 $3, $f0
+; FP64-NEXT: mfc1 $1, $f0
+; FP64-NEXT: lui $2, 16864
+; FP64-NEXT: ori $3, $zero, 0
+; FP64-NEXT: mtc1 $3, $f0
+; FP64-NEXT: mthc1 $2, $f0
; FP64-NEXT: sub.d $f1, $f12, $f0
; FP64-NEXT: trunc.w.d $f1, $f1
-; FP64-NEXT: mfc1 $1, $f1
+; FP64-NEXT: mfc1 $2, $f1
; FP64-NEXT: lui $3, 32768
-; FP64-NEXT: xor $1, $1, $3
+; FP64-NEXT: xor $2, $2, $3
; FP64-NEXT: addiu $3, $zero, 1
; FP64-NEXT: c.ult.d $f12, $f0
; FP64-NEXT: movf $3, $zero, $fcc0
; FP64-NEXT: andi $3, $3, 1
-; FP64-NEXT: movn $1, $2, $3
-; FP64-NEXT: andi $2, $1, 255
+; FP64-NEXT: movn $2, $1, $3
+; FP64-NEXT: andi $2, $2, 255
; FP64-NEXT: jr $ra
; FP64-NEXT: nop
entry:
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll
index a23ab7c3ca8f..6e7e56aaa1ba 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll
@@ -14,11 +14,12 @@ define i32 @main() {
; MIPS32-NEXT: addiu $4, $1, %lo($.str)
; MIPS32-NEXT: lui $1, 18838
; MIPS32-NEXT: ori $5, $1, 722
-; MIPS32-NEXT: ori $1, $zero, 0
-; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: ori $2, $zero, 0
+; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: jal printf
; MIPS32-NEXT: nop
-; MIPS32-NEXT: lw $2, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: addiu $sp, $sp, 24
; MIPS32-NEXT: jr $ra
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll
index 8e8ca91eb9de..e293a565fc70 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll
@@ -23,8 +23,9 @@ define i32 @call_global(i32 %a, i32 %b) {
; MIPS32_PIC-NEXT: .cfi_def_cfa_offset 24
; MIPS32_PIC-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS32_PIC-NEXT: .cfi_offset 31, -4
-; MIPS32_PIC-NEXT: addu $gp, $2, $25
-; MIPS32_PIC-NEXT: lw $25, %call16(f)($gp)
+; MIPS32_PIC-NEXT: addu $1, $2, $25
+; MIPS32_PIC-NEXT: lw $25, %call16(f)($1)
+; MIPS32_PIC-NEXT: move $gp, $1
; MIPS32_PIC-NEXT: jalr $25
; MIPS32_PIC-NEXT: nop
; MIPS32_PIC-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
@@ -45,9 +46,10 @@ define i32 @call_global_with_local_linkage(i32 %a, i32 %b) {
; MIPS32_PIC-NEXT: .cfi_def_cfa_offset 24
; MIPS32_PIC-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS32_PIC-NEXT: .cfi_offset 31, -4
-; MIPS32_PIC-NEXT: addu $gp, $2, $25
-; MIPS32_PIC-NEXT: lw $1, %got(f_with_local_linkage)($gp)
-; MIPS32_PIC-NEXT: addiu $25, $1, %lo(f_with_local_linkage)
+; MIPS32_PIC-NEXT: addu $1, $2, $25
+; MIPS32_PIC-NEXT: lw $2, %got(f_with_local_linkage)($1)
+; MIPS32_PIC-NEXT: addiu $25, $2, %lo(f_with_local_linkage)
+; MIPS32_PIC-NEXT: move $gp, $1
; MIPS32_PIC-NEXT: jalr $25
; MIPS32_PIC-NEXT: nop
; MIPS32_PIC-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll
index a7e0d05544be..7eb952b47c56 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll
@@ -188,12 +188,13 @@ entry:
define i1 @sgt_i64(i64 %a, i64 %b) {
; MIPS32-LABEL: sgt_i64:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: slt $2, $7, $5
-; MIPS32-NEXT: xor $1, $5, $7
-; MIPS32-NEXT: sltiu $3, $1, 1
-; MIPS32-NEXT: sltu $1, $6, $4
-; MIPS32-NEXT: andi $3, $3, 1
-; MIPS32-NEXT: movn $2, $1, $3
+; MIPS32-NEXT: slt $1, $7, $5
+; MIPS32-NEXT: xor $2, $5, $7
+; MIPS32-NEXT: sltiu $2, $2, 1
+; MIPS32-NEXT: sltu $3, $6, $4
+; MIPS32-NEXT: andi $2, $2, 1
+; MIPS32-NEXT: movn $1, $3, $2
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -205,13 +206,14 @@ define i1 @sge_i64(i64 %a, i64 %b) {
; MIPS32-LABEL: sge_i64:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: slt $1, $5, $7
-; MIPS32-NEXT: xori $2, $1, 1
-; MIPS32-NEXT: xor $1, $5, $7
-; MIPS32-NEXT: sltiu $3, $1, 1
-; MIPS32-NEXT: sltu $1, $4, $6
; MIPS32-NEXT: xori $1, $1, 1
-; MIPS32-NEXT: andi $3, $3, 1
-; MIPS32-NEXT: movn $2, $1, $3
+; MIPS32-NEXT: xor $2, $5, $7
+; MIPS32-NEXT: sltiu $2, $2, 1
+; MIPS32-NEXT: sltu $3, $4, $6
+; MIPS32-NEXT: xori $3, $3, 1
+; MIPS32-NEXT: andi $2, $2, 1
+; MIPS32-NEXT: movn $1, $3, $2
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -222,12 +224,13 @@ entry:
define i1 @slt_i64(i64 %a, i64 %b) {
; MIPS32-LABEL: slt_i64:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: slt $2, $5, $7
-; MIPS32-NEXT: xor $1, $5, $7
-; MIPS32-NEXT: sltiu $3, $1, 1
-; MIPS32-NEXT: sltu $1, $4, $6
-; MIPS32-NEXT: andi $3, $3, 1
-; MIPS32-NEXT: movn $2, $1, $3
+; MIPS32-NEXT: slt $1, $5, $7
+; MIPS32-NEXT: xor $2, $5, $7
+; MIPS32-NEXT: sltiu $2, $2, 1
+; MIPS32-NEXT: sltu $3, $4, $6
+; MIPS32-NEXT: andi $2, $2, 1
+; MIPS32-NEXT: movn $1, $3, $2
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -239,13 +242,14 @@ define i1 @sle_i64(i64 %a, i64 %b) {
; MIPS32-LABEL: sle_i64:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: slt $1, $7, $5
-; MIPS32-NEXT: xori $2, $1, 1
-; MIPS32-NEXT: xor $1, $5, $7
-; MIPS32-NEXT: sltiu $3, $1, 1
-; MIPS32-NEXT: sltu $1, $6, $4
; MIPS32-NEXT: xori $1, $1, 1
-; MIPS32-NEXT: andi $3, $3, 1
-; MIPS32-NEXT: movn $2, $1, $3
+; MIPS32-NEXT: xor $2, $5, $7
+; MIPS32-NEXT: sltiu $2, $2, 1
+; MIPS32-NEXT: sltu $3, $6, $4
+; MIPS32-NEXT: xori $3, $3, 1
+; MIPS32-NEXT: andi $2, $2, 1
+; MIPS32-NEXT: movn $1, $3, $2
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -256,12 +260,13 @@ entry:
define i1 @ugt_i64(i64 %a, i64 %b) {
; MIPS32-LABEL: ugt_i64:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: sltu $2, $7, $5
-; MIPS32-NEXT: xor $1, $5, $7
-; MIPS32-NEXT: sltiu $3, $1, 1
-; MIPS32-NEXT: sltu $1, $6, $4
-; MIPS32-NEXT: andi $3, $3, 1
-; MIPS32-NEXT: movn $2, $1, $3
+; MIPS32-NEXT: sltu $1, $7, $5
+; MIPS32-NEXT: xor $2, $5, $7
+; MIPS32-NEXT: sltiu $2, $2, 1
+; MIPS32-NEXT: sltu $3, $6, $4
+; MIPS32-NEXT: andi $2, $2, 1
+; MIPS32-NEXT: movn $1, $3, $2
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -273,13 +278,14 @@ define i1 @uge_i64(i64 %a, i64 %b) {
; MIPS32-LABEL: uge_i64:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: sltu $1, $5, $7
-; MIPS32-NEXT: xori $2, $1, 1
-; MIPS32-NEXT: xor $1, $5, $7
-; MIPS32-NEXT: sltiu $3, $1, 1
-; MIPS32-NEXT: sltu $1, $4, $6
; MIPS32-NEXT: xori $1, $1, 1
-; MIPS32-NEXT: andi $3, $3, 1
-; MIPS32-NEXT: movn $2, $1, $3
+; MIPS32-NEXT: xor $2, $5, $7
+; MIPS32-NEXT: sltiu $2, $2, 1
+; MIPS32-NEXT: sltu $3, $4, $6
+; MIPS32-NEXT: xori $3, $3, 1
+; MIPS32-NEXT: andi $2, $2, 1
+; MIPS32-NEXT: movn $1, $3, $2
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -290,12 +296,13 @@ entry:
define i1 @ult_i64(i64 %a, i64 %b) {
; MIPS32-LABEL: ult_i64:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: sltu $2, $5, $7
-; MIPS32-NEXT: xor $1, $5, $7
-; MIPS32-NEXT: sltiu $3, $1, 1
-; MIPS32-NEXT: sltu $1, $4, $6
-; MIPS32-NEXT: andi $3, $3, 1
-; MIPS32-NEXT: movn $2, $1, $3
+; MIPS32-NEXT: sltu $1, $5, $7
+; MIPS32-NEXT: xor $2, $5, $7
+; MIPS32-NEXT: sltiu $2, $2, 1
+; MIPS32-NEXT: sltu $3, $4, $6
+; MIPS32-NEXT: andi $2, $2, 1
+; MIPS32-NEXT: movn $1, $3, $2
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -307,13 +314,14 @@ define i1 @ule_i64(i64 %a, i64 %b) {
; MIPS32-LABEL: ule_i64:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: sltu $1, $7, $5
-; MIPS32-NEXT: xori $2, $1, 1
-; MIPS32-NEXT: xor $1, $5, $7
-; MIPS32-NEXT: sltiu $3, $1, 1
-; MIPS32-NEXT: sltu $1, $6, $4
; MIPS32-NEXT: xori $1, $1, 1
-; MIPS32-NEXT: andi $3, $3, 1
-; MIPS32-NEXT: movn $2, $1, $3
+; MIPS32-NEXT: xor $2, $5, $7
+; MIPS32-NEXT: sltiu $2, $2, 1
+; MIPS32-NEXT: sltu $3, $6, $4
+; MIPS32-NEXT: xori $3, $3, 1
+; MIPS32-NEXT: andi $2, $2, 1
+; MIPS32-NEXT: movn $1, $3, $2
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll
index 804a14853bed..dcd6c76a8b2a 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll
@@ -7,35 +7,35 @@ define i32 @mod4_0_to_11(i32 %a) {
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $sp, $sp, -32
; MIPS32-NEXT: .cfi_def_cfa_offset 32
-; MIPS32-NEXT: sw $4, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: ori $1, $zero, 7
; MIPS32-NEXT: ori $2, $zero, 3
-; MIPS32-NEXT: sw $2, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: ori $2, $zero, 2
-; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: ori $2, $zero, 1
-; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: ori $2, $zero, 0
-; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: addiu $2, $zero, 65535
-; MIPS32-NEXT: sw $2, 24($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: ori $2, $zero, 0
-; MIPS32-NEXT: subu $2, $4, $2
-; MIPS32-NEXT: sw $2, 28($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sltu $1, $1, $2
+; MIPS32-NEXT: ori $3, $zero, 2
+; MIPS32-NEXT: ori $5, $zero, 1
+; MIPS32-NEXT: ori $6, $zero, 0
+; MIPS32-NEXT: addiu $7, $zero, 65535
+; MIPS32-NEXT: ori $8, $zero, 0
+; MIPS32-NEXT: subu $8, $4, $8
+; MIPS32-NEXT: sltu $1, $1, $8
; MIPS32-NEXT: andi $1, $1, 1
+; MIPS32-NEXT: sw $4, 28($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $2, 24($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $3, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $5, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $7, 8($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $8, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $1, $BB0_6
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_1: # %entry
-; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lui $1, %hi($JTI0_0)
-; MIPS32-NEXT: sll $2, $2, 2
-; MIPS32-NEXT: addu $1, $1, $2
+; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sll $3, $2, 2
+; MIPS32-NEXT: addu $1, $1, $3
; MIPS32-NEXT: lw $1, %lo($JTI0_0)($1)
; MIPS32-NEXT: jr $1
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_2: # %sw.bb
-; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS32-NEXT: addiu $sp, $sp, 32
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
@@ -45,37 +45,37 @@ define i32 @mod4_0_to_11(i32 %a) {
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_4: # %sw.bb2
-; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: addiu $sp, $sp, 32
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_5: # %sw.bb3
-; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 24($sp) # 4-byte Folded Reload
; MIPS32-NEXT: addiu $sp, $sp, 32
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_6: # %sw.default
; MIPS32-NEXT: .insn
; MIPS32-NEXT: # %bb.7: # %sw.epilog
-; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: ori $3, $zero, 8
-; MIPS32-NEXT: subu $2, $2, $3
-; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sltu $1, $1, $2
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB0_13
+; MIPS32-NEXT: ori $1, $zero, 8
+; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: subu $1, $2, $1
+; MIPS32-NEXT: lw $3, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sltu $4, $3, $1
+; MIPS32-NEXT: andi $4, $4, 1
+; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $4, $BB0_13
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_8: # %sw.epilog
-; MIPS32-NEXT: lw $2, 0($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lui $1, %hi($JTI0_1)
-; MIPS32-NEXT: sll $2, $2, 2
-; MIPS32-NEXT: addu $1, $1, $2
+; MIPS32-NEXT: lw $2, 0($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sll $3, $2, 2
+; MIPS32-NEXT: addu $1, $1, $3
; MIPS32-NEXT: lw $1, %lo($JTI0_1)($1)
; MIPS32-NEXT: jr $1
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_9: # %sw.bb4
-; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS32-NEXT: addiu $sp, $sp, 32
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
@@ -85,20 +85,35 @@ define i32 @mod4_0_to_11(i32 %a) {
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_11: # %sw.bb6
-; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: addiu $sp, $sp, 32
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_12: # %sw.bb7
-; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 24($sp) # 4-byte Folded Reload
; MIPS32-NEXT: addiu $sp, $sp, 32
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_13: # %sw.default8
-; MIPS32-NEXT: lw $2, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload
; MIPS32-NEXT: addiu $sp, $sp, 32
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
+; MIPS32: $JTI0_0:
+; MIPS32-NEXT: .4byte ($BB0_2)
+; MIPS32-NEXT: .4byte ($BB0_3)
+; MIPS32-NEXT: .4byte ($BB0_4)
+; MIPS32-NEXT: .4byte ($BB0_5)
+; MIPS32-NEXT: .4byte ($BB0_2)
+; MIPS32-NEXT: .4byte ($BB0_3)
+; MIPS32-NEXT: .4byte ($BB0_4)
+; MIPS32-NEXT: .4byte ($BB0_5)
+; MIPS32-NEXT: $JTI0_1:
+; MIPS32-NEXT: .4byte ($BB0_9)
+; MIPS32-NEXT: .4byte ($BB0_10)
+; MIPS32-NEXT: .4byte ($BB0_11)
+; MIPS32-NEXT: .4byte ($BB0_12)
+
;
; MIPS32_PIC-LABEL: mod4_0_to_11:
; MIPS32_PIC: # %bb.0: # %entry
@@ -107,104 +122,117 @@ define i32 @mod4_0_to_11(i32 %a) {
; MIPS32_PIC-NEXT: addiu $sp, $sp, -40
; MIPS32_PIC-NEXT: .cfi_def_cfa_offset 40
; MIPS32_PIC-NEXT: addu $1, $2, $25
-; MIPS32_PIC-NEXT: sw $1, 8($sp) # 4-byte Folded Spill
-; MIPS32_PIC-NEXT: sw $4, 12($sp) # 4-byte Folded Spill
-; MIPS32_PIC-NEXT: ori $1, $zero, 7
-; MIPS32_PIC-NEXT: ori $2, $zero, 3
-; MIPS32_PIC-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
-; MIPS32_PIC-NEXT: ori $2, $zero, 2
-; MIPS32_PIC-NEXT: sw $2, 20($sp) # 4-byte Folded Spill
-; MIPS32_PIC-NEXT: ori $2, $zero, 1
-; MIPS32_PIC-NEXT: sw $2, 24($sp) # 4-byte Folded Spill
-; MIPS32_PIC-NEXT: ori $2, $zero, 0
-; MIPS32_PIC-NEXT: sw $2, 28($sp) # 4-byte Folded Spill
-; MIPS32_PIC-NEXT: addiu $2, $zero, 65535
-; MIPS32_PIC-NEXT: sw $2, 32($sp) # 4-byte Folded Spill
-; MIPS32_PIC-NEXT: ori $2, $zero, 0
-; MIPS32_PIC-NEXT: subu $2, $4, $2
-; MIPS32_PIC-NEXT: sw $2, 36($sp) # 4-byte Folded Spill
-; MIPS32_PIC-NEXT: sltu $1, $1, $2
-; MIPS32_PIC-NEXT: andi $1, $1, 1
-; MIPS32_PIC-NEXT: bnez $1, $BB0_6
+; MIPS32_PIC-NEXT: ori $2, $zero, 7
+; MIPS32_PIC-NEXT: ori $3, $zero, 3
+; MIPS32_PIC-NEXT: ori $5, $zero, 2
+; MIPS32_PIC-NEXT: ori $6, $zero, 1
+; MIPS32_PIC-NEXT: ori $7, $zero, 0
+; MIPS32_PIC-NEXT: addiu $8, $zero, 65535
+; MIPS32_PIC-NEXT: ori $9, $zero, 0
+; MIPS32_PIC-NEXT: subu $9, $4, $9
+; MIPS32_PIC-NEXT: sltu $2, $2, $9
+; MIPS32_PIC-NEXT: andi $2, $2, 1
+; MIPS32_PIC-NEXT: sw $1, 36($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT: sw $4, 32($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT: sw $3, 28($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT: sw $5, 24($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT: sw $6, 20($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT: sw $7, 16($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT: sw $8, 12($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT: sw $9, 8($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT: bnez $2, $BB0_6
; MIPS32_PIC-NEXT: nop
; MIPS32_PIC-NEXT: $BB0_1: # %entry
-; MIPS32_PIC-NEXT: lw $2, 8($sp) # 4-byte Folded Reload
-; MIPS32_PIC-NEXT: lw $3, 36($sp) # 4-byte Folded Reload
-; MIPS32_PIC-NEXT: lw $1, %got($JTI0_0)($2)
-; MIPS32_PIC-NEXT: sll $3, $3, 2
-; MIPS32_PIC-NEXT: addu $1, $1, $3
-; MIPS32_PIC-NEXT: lw $1, %lo($JTI0_0)($1)
-; MIPS32_PIC-NEXT: addu $1, $1, $2
-; MIPS32_PIC-NEXT: jr $1
+; MIPS32_PIC-NEXT: lw $1, 36($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT: lw $2, %got($JTI0_0)($1)
+; MIPS32_PIC-NEXT: lw $3, 8($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT: sll $4, $3, 2
+; MIPS32_PIC-NEXT: addu $2, $2, $4
+; MIPS32_PIC-NEXT: lw $2, %lo($JTI0_0)($2)
+; MIPS32_PIC-NEXT: addu $2, $2, $1
+; MIPS32_PIC-NEXT: jr $2
; MIPS32_PIC-NEXT: nop
; MIPS32_PIC-NEXT: $BB0_2: # %sw.bb
-; MIPS32_PIC-NEXT: lw $2, 28($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT: lw $2, 16($sp) # 4-byte Folded Reload
; MIPS32_PIC-NEXT: addiu $sp, $sp, 40
; MIPS32_PIC-NEXT: jr $ra
; MIPS32_PIC-NEXT: nop
; MIPS32_PIC-NEXT: $BB0_3: # %sw.bb1
-; MIPS32_PIC-NEXT: lw $2, 24($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
; MIPS32_PIC-NEXT: addiu $sp, $sp, 40
; MIPS32_PIC-NEXT: jr $ra
; MIPS32_PIC-NEXT: nop
; MIPS32_PIC-NEXT: $BB0_4: # %sw.bb2
-; MIPS32_PIC-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT: lw $2, 24($sp) # 4-byte Folded Reload
; MIPS32_PIC-NEXT: addiu $sp, $sp, 40
; MIPS32_PIC-NEXT: jr $ra
; MIPS32_PIC-NEXT: nop
; MIPS32_PIC-NEXT: $BB0_5: # %sw.bb3
-; MIPS32_PIC-NEXT: lw $2, 16($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT: lw $2, 28($sp) # 4-byte Folded Reload
; MIPS32_PIC-NEXT: addiu $sp, $sp, 40
; MIPS32_PIC-NEXT: jr $ra
; MIPS32_PIC-NEXT: nop
; MIPS32_PIC-NEXT: $BB0_6: # %sw.default
; MIPS32_PIC-NEXT: .insn
; MIPS32_PIC-NEXT: # %bb.7: # %sw.epilog
-; MIPS32_PIC-NEXT: lw $1, 16($sp) # 4-byte Folded Reload
-; MIPS32_PIC-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
-; MIPS32_PIC-NEXT: ori $3, $zero, 8
-; MIPS32_PIC-NEXT: subu $2, $2, $3
-; MIPS32_PIC-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
-; MIPS32_PIC-NEXT: sltu $1, $1, $2
-; MIPS32_PIC-NEXT: andi $1, $1, 1
-; MIPS32_PIC-NEXT: bnez $1, $BB0_13
+; MIPS32_PIC-NEXT: ori $1, $zero, 8
+; MIPS32_PIC-NEXT: lw $2, 32($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT: subu $1, $2, $1
+; MIPS32_PIC-NEXT: lw $3, 28($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT: sltu $4, $3, $1
+; MIPS32_PIC-NEXT: andi $4, $4, 1
+; MIPS32_PIC-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT: bnez $4, $BB0_13
; MIPS32_PIC-NEXT: nop
; MIPS32_PIC-NEXT: $BB0_8: # %sw.epilog
-; MIPS32_PIC-NEXT: lw $2, 8($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT: lw $1, 36($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT: lw $2, %got($JTI0_1)($1)
; MIPS32_PIC-NEXT: lw $3, 4($sp) # 4-byte Folded Reload
-; MIPS32_PIC-NEXT: lw $1, %got($JTI0_1)($2)
-; MIPS32_PIC-NEXT: sll $3, $3, 2
-; MIPS32_PIC-NEXT: addu $1, $1, $3
-; MIPS32_PIC-NEXT: lw $1, %lo($JTI0_1)($1)
-; MIPS32_PIC-NEXT: addu $1, $1, $2
-; MIPS32_PIC-NEXT: jr $1
+; MIPS32_PIC-NEXT: sll $4, $3, 2
+; MIPS32_PIC-NEXT: addu $2, $2, $4
+; MIPS32_PIC-NEXT: lw $2, %lo($JTI0_1)($2)
+; MIPS32_PIC-NEXT: addu $2, $2, $1
+; MIPS32_PIC-NEXT: jr $2
; MIPS32_PIC-NEXT: nop
; MIPS32_PIC-NEXT: $BB0_9: # %sw.bb4
-; MIPS32_PIC-NEXT: lw $2, 28($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT: lw $2, 16($sp) # 4-byte Folded Reload
; MIPS32_PIC-NEXT: addiu $sp, $sp, 40
; MIPS32_PIC-NEXT: jr $ra
; MIPS32_PIC-NEXT: nop
; MIPS32_PIC-NEXT: $BB0_10: # %sw.bb5
-; MIPS32_PIC-NEXT: lw $2, 24($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
; MIPS32_PIC-NEXT: addiu $sp, $sp, 40
; MIPS32_PIC-NEXT: jr $ra
; MIPS32_PIC-NEXT: nop
; MIPS32_PIC-NEXT: $BB0_11: # %sw.bb6
-; MIPS32_PIC-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT: lw $2, 24($sp) # 4-byte Folded Reload
; MIPS32_PIC-NEXT: addiu $sp, $sp, 40
; MIPS32_PIC-NEXT: jr $ra
; MIPS32_PIC-NEXT: nop
; MIPS32_PIC-NEXT: $BB0_12: # %sw.bb7
-; MIPS32_PIC-NEXT: lw $2, 16($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT: lw $2, 28($sp) # 4-byte Folded Reload
; MIPS32_PIC-NEXT: addiu $sp, $sp, 40
; MIPS32_PIC-NEXT: jr $ra
; MIPS32_PIC-NEXT: nop
; MIPS32_PIC-NEXT: $BB0_13: # %sw.default8
-; MIPS32_PIC-NEXT: lw $2, 32($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS32_PIC-NEXT: addiu $sp, $sp, 40
; MIPS32_PIC-NEXT: jr $ra
; MIPS32_PIC-NEXT: nop
-
+; MIPS32_PIC: $JTI0_0:
+; MIPS32_PIC-NEXT: .gpword ($BB0_2)
+; MIPS32_PIC-NEXT: .gpword ($BB0_3)
+; MIPS32_PIC-NEXT: .gpword ($BB0_4)
+; MIPS32_PIC-NEXT: .gpword ($BB0_5)
+; MIPS32_PIC-NEXT: .gpword ($BB0_2)
+; MIPS32_PIC-NEXT: .gpword ($BB0_3)
+; MIPS32_PIC-NEXT: .gpword ($BB0_4)
+; MIPS32_PIC-NEXT: .gpword ($BB0_5)
+; MIPS32_PIC-NEXT: $JTI0_1:
+; MIPS32_PIC-NEXT: .gpword ($BB0_9)
+; MIPS32_PIC-NEXT: .gpword ($BB0_10)
+; MIPS32_PIC-NEXT: .gpword ($BB0_11)
+; MIPS32_PIC-NEXT: .gpword ($BB0_12)
entry:
switch i32 %a, label %sw.default [
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_4_unaligned.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_4_unaligned.ll
index 90043c0e9a12..318407d619f5 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_4_unaligned.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_4_unaligned.ll
@@ -15,11 +15,11 @@ define float @load_float_align1() {
; MIPS32-LABEL: load_float_align1:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $1, %hi(float_align1)
-; MIPS32-NEXT: addiu $2, $1, %lo(float_align1)
-; MIPS32-NEXT: # implicit-def: $at
-; MIPS32-NEXT: lwl $1, 3($2)
-; MIPS32-NEXT: lwr $1, 0($2)
-; MIPS32-NEXT: mtc1 $1, $f0
+; MIPS32-NEXT: addiu $1, $1, %lo(float_align1)
+; MIPS32-NEXT: # implicit-def: $v0
+; MIPS32-NEXT: lwl $2, 3($1)
+; MIPS32-NEXT: lwr $2, 0($1)
+; MIPS32-NEXT: mtc1 $2, $f0
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
@@ -38,11 +38,11 @@ define float @load_float_align2() {
; MIPS32-LABEL: load_float_align2:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $1, %hi(float_align2)
-; MIPS32-NEXT: addiu $2, $1, %lo(float_align2)
-; MIPS32-NEXT: # implicit-def: $at
-; MIPS32-NEXT: lwl $1, 3($2)
-; MIPS32-NEXT: lwr $1, 0($2)
-; MIPS32-NEXT: mtc1 $1, $f0
+; MIPS32-NEXT: addiu $1, $1, %lo(float_align2)
+; MIPS32-NEXT: # implicit-def: $v0
+; MIPS32-NEXT: lwl $2, 3($1)
+; MIPS32-NEXT: lwr $2, 0($1)
+; MIPS32-NEXT: mtc1 $2, $f0
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_split_because_of_memsize_or_align.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_split_because_of_memsize_or_align.ll
index a2afbf1c637e..c7a70d56f8a0 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_split_because_of_memsize_or_align.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_split_because_of_memsize_or_align.ll
@@ -131,23 +131,25 @@ entry:
define i64 @load5align1(%struct.MemSize5_Align1* %S) {
; MIPS32-LABEL: load5align1:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: # implicit-def: $v0
-; MIPS32-NEXT: lwl $2, 3($4)
-; MIPS32-NEXT: lwr $2, 0($4)
-; MIPS32-NEXT: lbu $1, 4($4)
+; MIPS32-NEXT: # implicit-def: $at
+; MIPS32-NEXT: lwl $1, 3($4)
+; MIPS32-NEXT: lwr $1, 0($4)
+; MIPS32-NEXT: lbu $2, 4($4)
; MIPS32-NEXT: addiu $3, $zero, 65535
-; MIPS32-NEXT: and $2, $2, $3
-; MIPS32-NEXT: andi $3, $1, 255
+; MIPS32-NEXT: and $1, $1, $3
+; MIPS32-NEXT: andi $3, $2, 255
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32R6-LABEL: load5align1:
; MIPS32R6: # %bb.0: # %entry
-; MIPS32R6-NEXT: lw $2, 0($4)
-; MIPS32R6-NEXT: lbu $1, 4($4)
+; MIPS32R6-NEXT: lw $1, 0($4)
+; MIPS32R6-NEXT: lbu $2, 4($4)
; MIPS32R6-NEXT: addiu $3, $zero, 65535
-; MIPS32R6-NEXT: and $2, $2, $3
-; MIPS32R6-NEXT: andi $3, $1, 255
+; MIPS32R6-NEXT: and $1, $1, $3
+; MIPS32R6-NEXT: andi $3, $2, 255
+; MIPS32R6-NEXT: move $2, $1
; MIPS32R6-NEXT: jrc $ra
entry:
%0 = bitcast %struct.MemSize5_Align1* %S to i40*
@@ -159,23 +161,25 @@ entry:
define i64 @load5align2(%struct.MemSize5_Align2* %S) {
; MIPS32-LABEL: load5align2:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: # implicit-def: $v0
-; MIPS32-NEXT: lwl $2, 3($4)
-; MIPS32-NEXT: lwr $2, 0($4)
-; MIPS32-NEXT: lbu $1, 4($4)
+; MIPS32-NEXT: # implicit-def: $at
+; MIPS32-NEXT: lwl $1, 3($4)
+; MIPS32-NEXT: lwr $1, 0($4)
+; MIPS32-NEXT: lbu $2, 4($4)
; MIPS32-NEXT: addiu $3, $zero, 65535
-; MIPS32-NEXT: and $2, $2, $3
-; MIPS32-NEXT: andi $3, $1, 255
+; MIPS32-NEXT: and $1, $1, $3
+; MIPS32-NEXT: andi $3, $2, 255
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32R6-LABEL: load5align2:
; MIPS32R6: # %bb.0: # %entry
-; MIPS32R6-NEXT: lw $2, 0($4)
-; MIPS32R6-NEXT: lbu $1, 4($4)
+; MIPS32R6-NEXT: lw $1, 0($4)
+; MIPS32R6-NEXT: lbu $2, 4($4)
; MIPS32R6-NEXT: addiu $3, $zero, 65535
-; MIPS32R6-NEXT: and $2, $2, $3
-; MIPS32R6-NEXT: andi $3, $1, 255
+; MIPS32R6-NEXT: and $1, $1, $3
+; MIPS32R6-NEXT: andi $3, $2, 255
+; MIPS32R6-NEXT: move $2, $1
; MIPS32R6-NEXT: jrc $ra
entry:
%0 = bitcast %struct.MemSize5_Align2* %S to i40*
@@ -187,21 +191,23 @@ entry:
define i64 @load5align4(%struct.MemSize5_Align4* %S) {
; MIPS32-LABEL: load5align4:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: lw $2, 0($4)
-; MIPS32-NEXT: lbu $1, 4($4)
+; MIPS32-NEXT: lw $1, 0($4)
+; MIPS32-NEXT: lbu $2, 4($4)
; MIPS32-NEXT: addiu $3, $zero, 65535
-; MIPS32-NEXT: and $2, $2, $3
-; MIPS32-NEXT: andi $3, $1, 255
+; MIPS32-NEXT: and $1, $1, $3
+; MIPS32-NEXT: andi $3, $2, 255
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32R6-LABEL: load5align4:
; MIPS32R6: # %bb.0: # %entry
-; MIPS32R6-NEXT: lw $2, 0($4)
-; MIPS32R6-NEXT: lbu $1, 4($4)
+; MIPS32R6-NEXT: lw $1, 0($4)
+; MIPS32R6-NEXT: lbu $2, 4($4)
; MIPS32R6-NEXT: addiu $3, $zero, 65535
-; MIPS32R6-NEXT: and $2, $2, $3
-; MIPS32R6-NEXT: andi $3, $1, 255
+; MIPS32R6-NEXT: and $1, $1, $3
+; MIPS32R6-NEXT: andi $3, $2, 255
+; MIPS32R6-NEXT: move $2, $1
; MIPS32R6-NEXT: jrc $ra
entry:
%0 = bitcast %struct.MemSize5_Align4* %S to i40*
@@ -213,21 +219,23 @@ entry:
define i64 @load5align8(%struct.MemSize5_Align8* %S) {
; MIPS32-LABEL: load5align8:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: lw $2, 0($4)
-; MIPS32-NEXT: lbu $1, 4($4)
+; MIPS32-NEXT: lw $1, 0($4)
+; MIPS32-NEXT: lbu $2, 4($4)
; MIPS32-NEXT: addiu $3, $zero, 65535
-; MIPS32-NEXT: and $2, $2, $3
-; MIPS32-NEXT: andi $3, $1, 255
+; MIPS32-NEXT: and $1, $1, $3
+; MIPS32-NEXT: andi $3, $2, 255
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32R6-LABEL: load5align8:
; MIPS32R6: # %bb.0: # %entry
-; MIPS32R6-NEXT: lw $2, 0($4)
-; MIPS32R6-NEXT: lbu $1, 4($4)
+; MIPS32R6-NEXT: lw $1, 0($4)
+; MIPS32R6-NEXT: lbu $2, 4($4)
; MIPS32R6-NEXT: addiu $3, $zero, 65535
-; MIPS32R6-NEXT: and $2, $2, $3
-; MIPS32R6-NEXT: andi $3, $1, 255
+; MIPS32R6-NEXT: and $1, $1, $3
+; MIPS32R6-NEXT: andi $3, $2, 255
+; MIPS32R6-NEXT: move $2, $1
; MIPS32R6-NEXT: jrc $ra
entry:
%0 = bitcast %struct.MemSize5_Align8* %S to i40*
@@ -239,25 +247,27 @@ entry:
define i64 @load6align1(%struct.MemSize6_Align1* %S) {
; MIPS32-LABEL: load6align1:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: # implicit-def: $v0
-; MIPS32-NEXT: lwl $2, 3($4)
-; MIPS32-NEXT: lwr $2, 0($4)
; MIPS32-NEXT: # implicit-def: $at
-; MIPS32-NEXT: lwl $1, 7($4)
-; MIPS32-NEXT: lwr $1, 4($4)
+; MIPS32-NEXT: lwl $1, 3($4)
+; MIPS32-NEXT: lwr $1, 0($4)
+; MIPS32-NEXT: # implicit-def: $v0
+; MIPS32-NEXT: lwl $2, 7($4)
+; MIPS32-NEXT: lwr $2, 4($4)
; MIPS32-NEXT: addiu $3, $zero, 65535
-; MIPS32-NEXT: and $2, $2, $3
-; MIPS32-NEXT: andi $3, $1, 65535
+; MIPS32-NEXT: and $1, $1, $3
+; MIPS32-NEXT: andi $3, $2, 65535
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32R6-LABEL: load6align1:
; MIPS32R6: # %bb.0: # %entry
-; MIPS32R6-NEXT: lw $2, 0($4)
-; MIPS32R6-NEXT: lhu $1, 4($4)
+; MIPS32R6-NEXT: lw $1, 0($4)
+; MIPS32R6-NEXT: lhu $2, 4($4)
; MIPS32R6-NEXT: addiu $3, $zero, 65535
-; MIPS32R6-NEXT: and $2, $2, $3
-; MIPS32R6-NEXT: andi $3, $1, 65535
+; MIPS32R6-NEXT: and $1, $1, $3
+; MIPS32R6-NEXT: andi $3, $2, 65535
+; MIPS32R6-NEXT: move $2, $1
; MIPS32R6-NEXT: jrc $ra
entry:
%0 = bitcast %struct.MemSize6_Align1* %S to i48*
@@ -269,23 +279,25 @@ entry:
define i64 @load6align2(%struct.MemSize6_Align2* %S) {
; MIPS32-LABEL: load6align2:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: # implicit-def: $v0
-; MIPS32-NEXT: lwl $2, 3($4)
-; MIPS32-NEXT: lwr $2, 0($4)
-; MIPS32-NEXT: lhu $1, 4($4)
+; MIPS32-NEXT: # implicit-def: $at
+; MIPS32-NEXT: lwl $1, 3($4)
+; MIPS32-NEXT: lwr $1, 0($4)
+; MIPS32-NEXT: lhu $2, 4($4)
; MIPS32-NEXT: addiu $3, $zero, 65535
-; MIPS32-NEXT: and $2, $2, $3
-; MIPS32-NEXT: andi $3, $1, 65535
+; MIPS32-NEXT: and $1, $1, $3
+; MIPS32-NEXT: andi $3, $2, 65535
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32R6-LABEL: load6align2:
; MIPS32R6: # %bb.0: # %entry
-; MIPS32R6-NEXT: lw $2, 0($4)
-; MIPS32R6-NEXT: lhu $1, 4($4)
+; MIPS32R6-NEXT: lw $1, 0($4)
+; MIPS32R6-NEXT: lhu $2, 4($4)
; MIPS32R6-NEXT: addiu $3, $zero, 65535
-; MIPS32R6-NEXT: and $2, $2, $3
-; MIPS32R6-NEXT: andi $3, $1, 65535
+; MIPS32R6-NEXT: and $1, $1, $3
+; MIPS32R6-NEXT: andi $3, $2, 65535
+; MIPS32R6-NEXT: move $2, $1
; MIPS32R6-NEXT: jrc $ra
entry:
%0 = bitcast %struct.MemSize6_Align2* %S to i48*
@@ -297,21 +309,23 @@ entry:
define i64 @load6align4(%struct.MemSize6_Align4* %S) {
; MIPS32-LABEL: load6align4:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: lw $2, 0($4)
-; MIPS32-NEXT: lhu $1, 4($4)
+; MIPS32-NEXT: lw $1, 0($4)
+; MIPS32-NEXT: lhu $2, 4($4)
; MIPS32-NEXT: addiu $3, $zero, 65535
-; MIPS32-NEXT: and $2, $2, $3
-; MIPS32-NEXT: andi $3, $1, 65535
+; MIPS32-NEXT: and $1, $1, $3
+; MIPS32-NEXT: andi $3, $2, 65535
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32R6-LABEL: load6align4:
; MIPS32R6: # %bb.0: # %entry
-; MIPS32R6-NEXT: lw $2, 0($4)
-; MIPS32R6-NEXT: lhu $1, 4($4)
+; MIPS32R6-NEXT: lw $1, 0($4)
+; MIPS32R6-NEXT: lhu $2, 4($4)
; MIPS32R6-NEXT: addiu $3, $zero, 65535
-; MIPS32R6-NEXT: and $2, $2, $3
-; MIPS32R6-NEXT: andi $3, $1, 65535
+; MIPS32R6-NEXT: and $1, $1, $3
+; MIPS32R6-NEXT: andi $3, $2, 65535
+; MIPS32R6-NEXT: move $2, $1
; MIPS32R6-NEXT: jrc $ra
entry:
%0 = bitcast %struct.MemSize6_Align4* %S to i48*
@@ -323,21 +337,23 @@ entry:
define i64 @load6align8(%struct.MemSize6_Align8* %S) {
; MIPS32-LABEL: load6align8:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: lw $2, 0($4)
-; MIPS32-NEXT: lhu $1, 4($4)
+; MIPS32-NEXT: lw $1, 0($4)
+; MIPS32-NEXT: lhu $2, 4($4)
; MIPS32-NEXT: addiu $3, $zero, 65535
-; MIPS32-NEXT: and $2, $2, $3
-; MIPS32-NEXT: andi $3, $1, 65535
+; MIPS32-NEXT: and $1, $1, $3
+; MIPS32-NEXT: andi $3, $2, 65535
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32R6-LABEL: load6align8:
; MIPS32R6: # %bb.0: # %entry
-; MIPS32R6-NEXT: lw $2, 0($4)
-; MIPS32R6-NEXT: lhu $1, 4($4)
+; MIPS32R6-NEXT: lw $1, 0($4)
+; MIPS32R6-NEXT: lhu $2, 4($4)
; MIPS32R6-NEXT: addiu $3, $zero, 65535
-; MIPS32R6-NEXT: and $2, $2, $3
-; MIPS32R6-NEXT: andi $3, $1, 65535
+; MIPS32R6-NEXT: and $1, $1, $3
+; MIPS32R6-NEXT: andi $3, $2, 65535
+; MIPS32R6-NEXT: move $2, $1
; MIPS32R6-NEXT: jrc $ra
entry:
%0 = bitcast %struct.MemSize6_Align8* %S to i48*
@@ -349,29 +365,31 @@ entry:
define i64 @load7align1(%struct.MemSize7_Align1* %S) {
; MIPS32-LABEL: load7align1:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: # implicit-def: $v0
-; MIPS32-NEXT: lwl $2, 3($4)
-; MIPS32-NEXT: lwr $2, 0($4)
; MIPS32-NEXT: # implicit-def: $at
-; MIPS32-NEXT: lwl $1, 7($4)
-; MIPS32-NEXT: lwr $1, 4($4)
-; MIPS32-NEXT: addiu $4, $zero, 65535
-; MIPS32-NEXT: lui $3, 255
-; MIPS32-NEXT: ori $3, $3, 65535
-; MIPS32-NEXT: and $2, $2, $4
-; MIPS32-NEXT: and $3, $1, $3
+; MIPS32-NEXT: lwl $1, 3($4)
+; MIPS32-NEXT: lwr $1, 0($4)
+; MIPS32-NEXT: # implicit-def: $v0
+; MIPS32-NEXT: lwl $2, 7($4)
+; MIPS32-NEXT: lwr $2, 4($4)
+; MIPS32-NEXT: addiu $3, $zero, 65535
+; MIPS32-NEXT: lui $4, 255
+; MIPS32-NEXT: ori $4, $4, 65535
+; MIPS32-NEXT: and $1, $1, $3
+; MIPS32-NEXT: and $3, $2, $4
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32R6-LABEL: load7align1:
; MIPS32R6: # %bb.0: # %entry
-; MIPS32R6-NEXT: lw $2, 0($4)
-; MIPS32R6-NEXT: lw $1, 4($4)
-; MIPS32R6-NEXT: addiu $4, $zero, 65535
-; MIPS32R6-NEXT: lui $3, 255
-; MIPS32R6-NEXT: ori $3, $3, 65535
-; MIPS32R6-NEXT: and $2, $2, $4
-; MIPS32R6-NEXT: and $3, $1, $3
+; MIPS32R6-NEXT: lw $1, 0($4)
+; MIPS32R6-NEXT: lw $2, 4($4)
+; MIPS32R6-NEXT: addiu $3, $zero, 65535
+; MIPS32R6-NEXT: lui $4, 255
+; MIPS32R6-NEXT: ori $4, $4, 65535
+; MIPS32R6-NEXT: and $1, $1, $3
+; MIPS32R6-NEXT: and $3, $2, $4
+; MIPS32R6-NEXT: move $2, $1
; MIPS32R6-NEXT: jrc $ra
entry:
%0 = bitcast %struct.MemSize7_Align1* %S to i56*
@@ -383,29 +401,31 @@ entry:
define i64 @load7align2(%struct.MemSize7_Align2* %S) {
; MIPS32-LABEL: load7align2:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: # implicit-def: $v0
-; MIPS32-NEXT: lwl $2, 3($4)
-; MIPS32-NEXT: lwr $2, 0($4)
; MIPS32-NEXT: # implicit-def: $at
-; MIPS32-NEXT: lwl $1, 7($4)
-; MIPS32-NEXT: lwr $1, 4($4)
-; MIPS32-NEXT: addiu $4, $zero, 65535
-; MIPS32-NEXT: lui $3, 255
-; MIPS32-NEXT: ori $3, $3, 65535
-; MIPS32-NEXT: and $2, $2, $4
-; MIPS32-NEXT: and $3, $1, $3
+; MIPS32-NEXT: lwl $1, 3($4)
+; MIPS32-NEXT: lwr $1, 0($4)
+; MIPS32-NEXT: # implicit-def: $v0
+; MIPS32-NEXT: lwl $2, 7($4)
+; MIPS32-NEXT: lwr $2, 4($4)
+; MIPS32-NEXT: addiu $3, $zero, 65535
+; MIPS32-NEXT: lui $4, 255
+; MIPS32-NEXT: ori $4, $4, 65535
+; MIPS32-NEXT: and $1, $1, $3
+; MIPS32-NEXT: and $3, $2, $4
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32R6-LABEL: load7align2:
; MIPS32R6: # %bb.0: # %entry
-; MIPS32R6-NEXT: lw $2, 0($4)
-; MIPS32R6-NEXT: lw $1, 4($4)
-; MIPS32R6-NEXT: addiu $4, $zero, 65535
-; MIPS32R6-NEXT: lui $3, 255
-; MIPS32R6-NEXT: ori $3, $3, 65535
-; MIPS32R6-NEXT: and $2, $2, $4
-; MIPS32R6-NEXT: and $3, $1, $3
+; MIPS32R6-NEXT: lw $1, 0($4)
+; MIPS32R6-NEXT: lw $2, 4($4)
+; MIPS32R6-NEXT: addiu $3, $zero, 65535
+; MIPS32R6-NEXT: lui $4, 255
+; MIPS32R6-NEXT: ori $4, $4, 65535
+; MIPS32R6-NEXT: and $1, $1, $3
+; MIPS32R6-NEXT: and $3, $2, $4
+; MIPS32R6-NEXT: move $2, $1
; MIPS32R6-NEXT: jrc $ra
entry:
%0 = bitcast %struct.MemSize7_Align2* %S to i56*
@@ -417,25 +437,27 @@ entry:
define i64 @load7align4(%struct.MemSize7_Align4* %S) {
; MIPS32-LABEL: load7align4:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: lw $2, 0($4)
-; MIPS32-NEXT: lw $1, 4($4)
-; MIPS32-NEXT: addiu $4, $zero, 65535
-; MIPS32-NEXT: lui $3, 255
-; MIPS32-NEXT: ori $3, $3, 65535
-; MIPS32-NEXT: and $2, $2, $4
-; MIPS32-NEXT: and $3, $1, $3
+; MIPS32-NEXT: lw $1, 0($4)
+; MIPS32-NEXT: lw $2, 4($4)
+; MIPS32-NEXT: addiu $3, $zero, 65535
+; MIPS32-NEXT: lui $4, 255
+; MIPS32-NEXT: ori $4, $4, 65535
+; MIPS32-NEXT: and $1, $1, $3
+; MIPS32-NEXT: and $3, $2, $4
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32R6-LABEL: load7align4:
; MIPS32R6: # %bb.0: # %entry
-; MIPS32R6-NEXT: lw $2, 0($4)
-; MIPS32R6-NEXT: lw $1, 4($4)
-; MIPS32R6-NEXT: addiu $4, $zero, 65535
-; MIPS32R6-NEXT: lui $3, 255
-; MIPS32R6-NEXT: ori $3, $3, 65535
-; MIPS32R6-NEXT: and $2, $2, $4
-; MIPS32R6-NEXT: and $3, $1, $3
+; MIPS32R6-NEXT: lw $1, 0($4)
+; MIPS32R6-NEXT: lw $2, 4($4)
+; MIPS32R6-NEXT: addiu $3, $zero, 65535
+; MIPS32R6-NEXT: lui $4, 255
+; MIPS32R6-NEXT: ori $4, $4, 65535
+; MIPS32R6-NEXT: and $1, $1, $3
+; MIPS32R6-NEXT: and $3, $2, $4
+; MIPS32R6-NEXT: move $2, $1
; MIPS32R6-NEXT: jrc $ra
entry:
%0 = bitcast %struct.MemSize7_Align4* %S to i56*
@@ -447,25 +469,27 @@ entry:
define i64 @load7align8(%struct.MemSize7_Align8* %S) {
; MIPS32-LABEL: load7align8:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: lw $2, 0($4)
-; MIPS32-NEXT: lw $1, 4($4)
-; MIPS32-NEXT: addiu $4, $zero, 65535
-; MIPS32-NEXT: lui $3, 255
-; MIPS32-NEXT: ori $3, $3, 65535
-; MIPS32-NEXT: and $2, $2, $4
-; MIPS32-NEXT: and $3, $1, $3
+; MIPS32-NEXT: lw $1, 0($4)
+; MIPS32-NEXT: lw $2, 4($4)
+; MIPS32-NEXT: addiu $3, $zero, 65535
+; MIPS32-NEXT: lui $4, 255
+; MIPS32-NEXT: ori $4, $4, 65535
+; MIPS32-NEXT: and $1, $1, $3
+; MIPS32-NEXT: and $3, $2, $4
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32R6-LABEL: load7align8:
; MIPS32R6: # %bb.0: # %entry
-; MIPS32R6-NEXT: lw $2, 0($4)
-; MIPS32R6-NEXT: lw $1, 4($4)
-; MIPS32R6-NEXT: addiu $4, $zero, 65535
-; MIPS32R6-NEXT: lui $3, 255
-; MIPS32R6-NEXT: ori $3, $3, 65535
-; MIPS32R6-NEXT: and $2, $2, $4
-; MIPS32R6-NEXT: and $3, $1, $3
+; MIPS32R6-NEXT: lw $1, 0($4)
+; MIPS32R6-NEXT: lw $2, 4($4)
+; MIPS32R6-NEXT: addiu $3, $zero, 65535
+; MIPS32R6-NEXT: lui $4, 255
+; MIPS32R6-NEXT: ori $4, $4, 65535
+; MIPS32R6-NEXT: and $1, $1, $3
+; MIPS32R6-NEXT: and $3, $2, $4
+; MIPS32R6-NEXT: move $2, $1
; MIPS32R6-NEXT: jrc $ra
entry:
%0 = bitcast %struct.MemSize7_Align8* %S to i56*
@@ -478,15 +502,15 @@ define double @load_double_align1() {
; MIPS32-LABEL: load_double_align1:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $1, %hi(double_align1)
-; MIPS32-NEXT: addiu $3, $1, %lo(double_align1)
-; MIPS32-NEXT: # implicit-def: $at
-; MIPS32-NEXT: lwl $1, 3($3)
-; MIPS32-NEXT: lwr $1, 0($3)
+; MIPS32-NEXT: addiu $1, $1, %lo(double_align1)
; MIPS32-NEXT: # implicit-def: $v0
-; MIPS32-NEXT: lwl $2, 7($3)
-; MIPS32-NEXT: lwr $2, 4($3)
-; MIPS32-NEXT: mtc1 $1, $f0
-; MIPS32-NEXT: mtc1 $2, $f1
+; MIPS32-NEXT: lwl $2, 3($1)
+; MIPS32-NEXT: lwr $2, 0($1)
+; MIPS32-NEXT: # implicit-def: $v1
+; MIPS32-NEXT: lwl $3, 7($1)
+; MIPS32-NEXT: lwr $3, 4($1)
+; MIPS32-NEXT: mtc1 $2, $f0
+; MIPS32-NEXT: mtc1 $3, $f1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
@@ -505,15 +529,15 @@ define double @load_double_align2() {
; MIPS32-LABEL: load_double_align2:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $1, %hi(double_align2)
-; MIPS32-NEXT: addiu $3, $1, %lo(double_align2)
-; MIPS32-NEXT: # implicit-def: $at
-; MIPS32-NEXT: lwl $1, 3($3)
-; MIPS32-NEXT: lwr $1, 0($3)
+; MIPS32-NEXT: addiu $1, $1, %lo(double_align2)
; MIPS32-NEXT: # implicit-def: $v0
-; MIPS32-NEXT: lwl $2, 7($3)
-; MIPS32-NEXT: lwr $2, 4($3)
-; MIPS32-NEXT: mtc1 $1, $f0
-; MIPS32-NEXT: mtc1 $2, $f1
+; MIPS32-NEXT: lwl $2, 3($1)
+; MIPS32-NEXT: lwr $2, 0($1)
+; MIPS32-NEXT: # implicit-def: $v1
+; MIPS32-NEXT: lwl $3, 7($1)
+; MIPS32-NEXT: lwr $3, 4($1)
+; MIPS32-NEXT: mtc1 $2, $f0
+; MIPS32-NEXT: mtc1 $3, $f1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
@@ -532,11 +556,11 @@ define double @load_double_align4() {
; MIPS32-LABEL: load_double_align4:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $1, %hi(double_align4)
-; MIPS32-NEXT: addiu $2, $1, %lo(double_align4)
-; MIPS32-NEXT: lw $1, 0($2)
-; MIPS32-NEXT: lw $2, 4($2)
-; MIPS32-NEXT: mtc1 $1, $f0
-; MIPS32-NEXT: mtc1 $2, $f1
+; MIPS32-NEXT: addiu $1, $1, %lo(double_align4)
+; MIPS32-NEXT: lw $2, 0($1)
+; MIPS32-NEXT: lw $1, 4($1)
+; MIPS32-NEXT: mtc1 $2, $f0
+; MIPS32-NEXT: mtc1 $1, $f1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll
index ce46bed175d6..2dcc174860c1 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll
@@ -6,124 +6,126 @@ define void @long_chain_ambiguous_i32_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i32*
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $sp, $sp, -48
; MIPS32-NEXT: .cfi_def_cfa_offset 48
-; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $5, 24($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $6, 28($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $7, 32($sp) # 4-byte Folded Spill
; MIPS32-NEXT: addiu $1, $sp, 64
; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 36($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: addiu $1, $sp, 68
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 40($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: addiu $1, $sp, 72
-; MIPS32-NEXT: lw $1, 0($1)
+; MIPS32-NEXT: addiu $2, $sp, 68
+; MIPS32-NEXT: lw $2, 0($2)
+; MIPS32-NEXT: addiu $3, $sp, 72
+; MIPS32-NEXT: lw $3, 0($3)
+; MIPS32-NEXT: andi $8, $4, 1
; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: andi $1, $4, 1
-; MIPS32-NEXT: bnez $1, $BB0_12
+; MIPS32-NEXT: sw $4, 40($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $5, 36($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $6, 32($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $7, 28($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $2, 24($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $3, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $8, $BB0_12
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB0_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_2: # %pre.PHI.1
-; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB0_7
+; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB0_7
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.3: # %pre.PHI.1
; MIPS32-NEXT: j $BB0_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_4: # %pre.PHI.1.0
-; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB0_8
+; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB0_8
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0
; MIPS32-NEXT: j $BB0_6
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_6: # %b.PHI.1.0
-; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 0($1)
+; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB0_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_7: # %b.PHI.1.1
-; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 0($1)
+; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB0_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_8: # %b.PHI.1.2
-; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 0($1)
+; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB0_9: # %b.PHI.1
-; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 16($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: sw $2, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: bnez $1, $BB0_11
+; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 32($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $3, $2, 1
+; MIPS32-NEXT: move $4, $1
+; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $3, $BB0_11
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.10: # %b.PHI.1
; MIPS32-NEXT: j $BB0_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_11: # %b.PHI.1.end
-; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 0($2)
; MIPS32-NEXT: addiu $sp, $sp, 48
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_12: # %pre.PHI.2
-; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB0_14
+; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB0_14
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.13: # %pre.PHI.2
; MIPS32-NEXT: j $BB0_15
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_14: # %b.PHI.2.0
-; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 0($1)
+; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB0_16
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_15: # %b.PHI.2.1
-; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 0($1)
+; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB0_16: # %b.PHI.2
-; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: bnez $1, $BB0_19
+; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 36($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $3, $2, 1
+; MIPS32-NEXT: move $4, $1
+; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $3, $BB0_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.17: # %b.PHI.2
; MIPS32-NEXT: j $BB0_18
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_18: # %b.PHI.2.end
; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 0($2)
; MIPS32-NEXT: addiu $sp, $sp, 48
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_19: # %b.PHI.3
-; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $3, 24($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $5, 28($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: move $4, $1
-; MIPS32-NEXT: andi $5, $5, 1
-; MIPS32-NEXT: movn $4, $1, $5
-; MIPS32-NEXT: andi $5, $3, 1
-; MIPS32-NEXT: move $3, $1
-; MIPS32-NEXT: movn $3, $4, $5
-; MIPS32-NEXT: sw $3, 0($2)
-; MIPS32-NEXT: sw $1, 0($2)
+; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $3, 32($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $4, $3, 1
+; MIPS32-NEXT: movn $1, $2, $4
+; MIPS32-NEXT: lw $4, 36($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $5, $4, 1
+; MIPS32-NEXT: move $6, $2
+; MIPS32-NEXT: movn $6, $1, $5
+; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sw $6, 0($1)
+; MIPS32-NEXT: sw $2, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 48
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
@@ -191,130 +193,132 @@ define void @long_chain_i32_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i32* %a, i32* %
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $sp, $sp, -56
; MIPS32-NEXT: .cfi_def_cfa_offset 56
-; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $5, 28($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $6, 32($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $7, 36($sp) # 4-byte Folded Spill
; MIPS32-NEXT: addiu $1, $sp, 72
; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 40($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: addiu $1, $sp, 76
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: addiu $1, $sp, 80
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 48($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: ori $1, $zero, 0
+; MIPS32-NEXT: addiu $2, $sp, 76
+; MIPS32-NEXT: lw $2, 0($2)
+; MIPS32-NEXT: addiu $3, $sp, 80
+; MIPS32-NEXT: lw $3, 0($3)
+; MIPS32-NEXT: ori $8, $zero, 0
+; MIPS32-NEXT: andi $9, $4, 1
; MIPS32-NEXT: sw $1, 52($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: andi $1, $4, 1
-; MIPS32-NEXT: bnez $1, $BB1_12
+; MIPS32-NEXT: sw $4, 48($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $5, 44($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $6, 40($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $7, 36($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $2, 32($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $3, 28($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $8, 24($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $9, $BB1_12
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB1_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_2: # %pre.PHI.1
-; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB1_7
+; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB1_7
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.3: # %pre.PHI.1
; MIPS32-NEXT: j $BB1_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_4: # %pre.PHI.1.0
-; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB1_8
+; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB1_8
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0
; MIPS32-NEXT: j $BB1_6
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_6: # %b.PHI.1.0
; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $2, 0($1)
+; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB1_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_7: # %b.PHI.1.1
-; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 0($1)
+; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB1_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_8: # %b.PHI.1.2
-; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 20($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: $BB1_9: # %b.PHI.1
-; MIPS32-NEXT: lw $2, 52($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $3, 20($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: sw $3, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: sw $3, 12($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: bnez $1, $BB1_11
+; MIPS32-NEXT: lw $2, 0($1)
+; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: $BB1_9: # %b.PHI.1
+; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 40($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $3, $2, 1
+; MIPS32-NEXT: move $4, $1
+; MIPS32-NEXT: lw $5, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $4, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $3, $BB1_11
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.10: # %b.PHI.1
; MIPS32-NEXT: j $BB1_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_11: # %b.PHI.1.end
-; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 48($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 0($2)
; MIPS32-NEXT: addiu $sp, $sp, 56
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_12: # %pre.PHI.2
-; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB1_14
+; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB1_14
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.13: # %pre.PHI.2
; MIPS32-NEXT: j $BB1_15
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_14: # %b.PHI.2.0
; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $2, 0($1)
+; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB1_16
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_15: # %b.PHI.2.1
-; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 0($1)
+; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB1_16: # %b.PHI.2
-; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: move $3, $2
-; MIPS32-NEXT: sw $3, 12($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: bnez $1, $BB1_19
+; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $3, $2, 1
+; MIPS32-NEXT: move $4, $1
+; MIPS32-NEXT: move $5, $1
+; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $4, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $3, $BB1_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.17: # %b.PHI.2
; MIPS32-NEXT: j $BB1_18
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_18: # %b.PHI.2.end
; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 48($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 0($2)
; MIPS32-NEXT: addiu $sp, $sp, 56
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_19: # %b.PHI.3
-; MIPS32-NEXT: lw $2, 48($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $3, 28($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $5, 32($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $4, 16($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $5, $5, 1
-; MIPS32-NEXT: movn $4, $1, $5
-; MIPS32-NEXT: andi $5, $3, 1
-; MIPS32-NEXT: move $3, $1
-; MIPS32-NEXT: movn $3, $4, $5
-; MIPS32-NEXT: sw $3, 0($2)
-; MIPS32-NEXT: sw $1, 0($2)
+; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $3, 40($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $4, $3, 1
+; MIPS32-NEXT: movn $1, $2, $4
+; MIPS32-NEXT: lw $4, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $5, $4, 1
+; MIPS32-NEXT: move $6, $2
+; MIPS32-NEXT: movn $6, $1, $5
+; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sw $6, 0($1)
+; MIPS32-NEXT: sw $2, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 56
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
@@ -381,124 +385,126 @@ define void @long_chain_ambiguous_float_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, flo
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $sp, $sp, -48
; MIPS32-NEXT: .cfi_def_cfa_offset 48
-; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $5, 24($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $6, 28($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $7, 32($sp) # 4-byte Folded Spill
; MIPS32-NEXT: addiu $1, $sp, 64
; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 36($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: addiu $1, $sp, 68
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 40($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: addiu $1, $sp, 72
-; MIPS32-NEXT: lw $1, 0($1)
+; MIPS32-NEXT: addiu $2, $sp, 68
+; MIPS32-NEXT: lw $2, 0($2)
+; MIPS32-NEXT: addiu $3, $sp, 72
+; MIPS32-NEXT: lw $3, 0($3)
+; MIPS32-NEXT: andi $8, $4, 1
; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: andi $1, $4, 1
-; MIPS32-NEXT: bnez $1, $BB2_12
+; MIPS32-NEXT: sw $4, 40($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $5, 36($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $6, 32($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $7, 28($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $2, 24($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $3, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $8, $BB2_12
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB2_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_2: # %pre.PHI.1
-; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB2_7
+; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB2_7
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.3: # %pre.PHI.1
; MIPS32-NEXT: j $BB2_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_4: # %pre.PHI.1.0
-; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB2_8
+; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB2_8
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0
; MIPS32-NEXT: j $BB2_6
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_6: # %b.PHI.1.0
-; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 0($1)
+; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB2_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_7: # %b.PHI.1.1
-; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 0($1)
+; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB2_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_8: # %b.PHI.1.2
-; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 0($1)
+; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB2_9: # %b.PHI.1
-; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 16($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: sw $2, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: bnez $1, $BB2_11
+; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 32($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $3, $2, 1
+; MIPS32-NEXT: move $4, $1
+; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $3, $BB2_11
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.10: # %b.PHI.1
; MIPS32-NEXT: j $BB2_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_11: # %b.PHI.1.end
-; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 0($2)
; MIPS32-NEXT: addiu $sp, $sp, 48
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_12: # %pre.PHI.2
-; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB2_14
+; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB2_14
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.13: # %pre.PHI.2
; MIPS32-NEXT: j $BB2_15
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_14: # %b.PHI.2.0
-; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 0($1)
+; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB2_16
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_15: # %b.PHI.2.1
-; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 0($1)
+; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB2_16: # %b.PHI.2
-; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: bnez $1, $BB2_19
+; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 36($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $3, $2, 1
+; MIPS32-NEXT: move $4, $1
+; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $3, $BB2_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.17: # %b.PHI.2
; MIPS32-NEXT: j $BB2_18
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_18: # %b.PHI.2.end
; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 0($2)
; MIPS32-NEXT: addiu $sp, $sp, 48
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_19: # %b.PHI.3
-; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $3, 24($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $5, 28($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: move $4, $1
-; MIPS32-NEXT: andi $5, $5, 1
-; MIPS32-NEXT: movn $4, $1, $5
-; MIPS32-NEXT: andi $5, $3, 1
-; MIPS32-NEXT: move $3, $1
-; MIPS32-NEXT: movn $3, $4, $5
-; MIPS32-NEXT: sw $3, 0($2)
-; MIPS32-NEXT: sw $1, 0($2)
+; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $3, 32($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $4, $3, 1
+; MIPS32-NEXT: movn $1, $2, $4
+; MIPS32-NEXT: lw $4, 36($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $5, $4, 1
+; MIPS32-NEXT: move $6, $2
+; MIPS32-NEXT: movn $6, $1, $5
+; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sw $6, 0($1)
+; MIPS32-NEXT: sw $2, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 48
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
@@ -566,40 +572,40 @@ define void @long_chain_float_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, float* %a, fl
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $sp, $sp, -56
; MIPS32-NEXT: .cfi_def_cfa_offset 56
-; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $5, 28($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $6, 32($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $7, 36($sp) # 4-byte Folded Spill
; MIPS32-NEXT: addiu $1, $sp, 72
; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 40($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: addiu $1, $sp, 76
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: addiu $1, $sp, 80
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 48($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: ori $1, $zero, 0
-; MIPS32-NEXT: mtc1 $1, $f0
-; MIPS32-NEXT: swc1 $f0, 52($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: andi $1, $4, 1
-; MIPS32-NEXT: bnez $1, $BB3_12
+; MIPS32-NEXT: addiu $2, $sp, 76
+; MIPS32-NEXT: lw $2, 0($2)
+; MIPS32-NEXT: addiu $3, $sp, 80
+; MIPS32-NEXT: lw $3, 0($3)
+; MIPS32-NEXT: ori $8, $zero, 0
+; MIPS32-NEXT: mtc1 $8, $f0
+; MIPS32-NEXT: andi $8, $4, 1
+; MIPS32-NEXT: sw $1, 52($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $4, 48($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $5, 44($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $6, 40($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $7, 36($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $2, 32($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $3, 28($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: swc1 $f0, 24($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $8, $BB3_12
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB3_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_2: # %pre.PHI.1
-; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB3_7
+; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB3_7
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.3: # %pre.PHI.1
; MIPS32-NEXT: j $BB3_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_4: # %pre.PHI.1.0
-; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB3_8
+; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB3_8
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0
; MIPS32-NEXT: j $BB3_6
@@ -611,39 +617,40 @@ define void @long_chain_float_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, float* %a, fl
; MIPS32-NEXT: j $BB3_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_7: # %b.PHI.1.1
-; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lwc1 $f0, 0($1)
; MIPS32-NEXT: swc1 $f0, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB3_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_8: # %b.PHI.1.2
-; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lwc1 $f0, 0($1)
; MIPS32-NEXT: swc1 $f0, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB3_9: # %b.PHI.1
-; MIPS32-NEXT: lwc1 $f0, 52($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lwc1 $f1, 20($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: swc1 $f1, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: swc1 $f1, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lwc1 $f0, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: mov.s $f1, $f0
+; MIPS32-NEXT: lwc1 $f2, 24($sp) # 4-byte Folded Reload
; MIPS32-NEXT: swc1 $f0, 16($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: bnez $1, $BB3_11
+; MIPS32-NEXT: swc1 $f1, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: swc1 $f2, 8($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $2, $BB3_11
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.10: # %b.PHI.1
; MIPS32-NEXT: j $BB3_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_11: # %b.PHI.1.end
-; MIPS32-NEXT: lwc1 $f0, 8($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lwc1 $f0, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
; MIPS32-NEXT: swc1 $f0, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 56
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_12: # %pre.PHI.2
-; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB3_14
+; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB3_14
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.13: # %pre.PHI.2
; MIPS32-NEXT: j $BB3_15
@@ -655,42 +662,43 @@ define void @long_chain_float_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, float* %a, fl
; MIPS32-NEXT: j $BB3_16
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_15: # %b.PHI.2.1
-; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lwc1 $f0, 0($1)
; MIPS32-NEXT: swc1 $f0, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB3_16: # %b.PHI.2
-; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lwc1 $f0, 4($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: swc1 $f0, 0($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: andi $1, $1, 1
+; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: mov.s $f1, $f0
+; MIPS32-NEXT: mov.s $f2, $f0
+; MIPS32-NEXT: swc1 $f0, 0($sp) # 4-byte Folded Spill
; MIPS32-NEXT: swc1 $f1, 12($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: swc1 $f0, 16($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: bnez $1, $BB3_19
+; MIPS32-NEXT: swc1 $f2, 8($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $2, $BB3_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.17: # %b.PHI.2
; MIPS32-NEXT: j $BB3_18
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_18: # %b.PHI.2.end
; MIPS32-NEXT: lwc1 $f0, 0($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
; MIPS32-NEXT: swc1 $f0, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 56
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_19: # %b.PHI.3
-; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $3, 32($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lwc1 $f0, 12($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lwc1 $f2, 16($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $3, $3, 1
+; MIPS32-NEXT: lwc1 $f0, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lwc1 $f1, 12($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: movn.s $f0, $f1, $2
+; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $3, $2, 1
+; MIPS32-NEXT: mov.s $f2, $f1
; MIPS32-NEXT: movn.s $f2, $f0, $3
-; MIPS32-NEXT: andi $2, $2, 1
-; MIPS32-NEXT: mov.s $f1, $f0
-; MIPS32-NEXT: movn.s $f1, $f2, $2
-; MIPS32-NEXT: swc1 $f1, 0($1)
-; MIPS32-NEXT: swc1 $f0, 0($1)
+; MIPS32-NEXT: lw $3, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: swc1 $f2, 0($3)
+; MIPS32-NEXT: swc1 $f1, 0($3)
; MIPS32-NEXT: addiu $sp, $sp, 56
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll
index 2a5afd5b1022..bafa309df76a 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll
@@ -6,124 +6,126 @@ define void @long_chain_ambiguous_i64_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64*
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $sp, $sp, -72
; MIPS32-NEXT: .cfi_def_cfa_offset 72
-; MIPS32-NEXT: sw $4, 44($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $5, 48($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $6, 52($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $7, 56($sp) # 4-byte Folded Spill
; MIPS32-NEXT: addiu $1, $sp, 88
; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 60($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: addiu $1, $sp, 92
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 64($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: addiu $1, $sp, 96
-; MIPS32-NEXT: lw $1, 0($1)
+; MIPS32-NEXT: addiu $2, $sp, 92
+; MIPS32-NEXT: lw $2, 0($2)
+; MIPS32-NEXT: addiu $3, $sp, 96
+; MIPS32-NEXT: lw $3, 0($3)
+; MIPS32-NEXT: andi $8, $4, 1
; MIPS32-NEXT: sw $1, 68($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: andi $1, $4, 1
-; MIPS32-NEXT: bnez $1, $BB0_12
+; MIPS32-NEXT: sw $4, 64($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $5, 60($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $6, 56($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $7, 52($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $2, 48($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $3, 44($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $8, $BB0_12
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB0_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_2: # %pre.PHI.1
-; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB0_7
+; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB0_7
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.3: # %pre.PHI.1
; MIPS32-NEXT: j $BB0_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_4: # %pre.PHI.1.0
-; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB0_8
+; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB0_8
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0
; MIPS32-NEXT: j $BB0_6
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_6: # %b.PHI.1.0
-; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill
; MIPS32-NEXT: j $BB0_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_7: # %b.PHI.1.1
-; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill
; MIPS32-NEXT: j $BB0_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_8: # %b.PHI.1.2
-; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill
; MIPS32-NEXT: $BB0_9: # %b.PHI.1
-; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 32($sp) # 8-byte Folded Reload
-; MIPS32-NEXT: sdc1 $f0, 16($sp) # 8-byte Folded Spill
-; MIPS32-NEXT: andi $1, $1, 1
+; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: mov.d $f2, $f0
; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill
-; MIPS32-NEXT: bnez $1, $BB0_11
+; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill
+; MIPS32-NEXT: bnez $2, $BB0_11
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.10: # %b.PHI.1
; MIPS32-NEXT: j $BB0_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_11: # %b.PHI.1.end
-; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload
-; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload
+; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sdc1 $f0, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 72
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_12: # %pre.PHI.2
-; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB0_14
+; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB0_14
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.13: # %pre.PHI.2
; MIPS32-NEXT: j $BB0_15
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_14: # %b.PHI.2.0
-; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill
; MIPS32-NEXT: j $BB0_16
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_15: # %b.PHI.2.1
-; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill
; MIPS32-NEXT: $BB0_16: # %b.PHI.2
-; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload
+; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: mov.d $f2, $f0
; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill
-; MIPS32-NEXT: bnez $1, $BB0_19
+; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill
+; MIPS32-NEXT: bnez $2, $BB0_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.17: # %b.PHI.2
; MIPS32-NEXT: j $BB0_18
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_18: # %b.PHI.2.end
; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload
-; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sdc1 $f0, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 72
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_19: # %b.PHI.3
-; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 48($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $3, 52($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload
-; MIPS32-NEXT: mov.d $f4, $f0
-; MIPS32-NEXT: andi $3, $3, 1
+; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload
+; MIPS32-NEXT: ldc1 $f2, 16($sp) # 8-byte Folded Reload
+; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: movn.d $f0, $f2, $2
+; MIPS32-NEXT: lw $2, 60($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $3, $2, 1
+; MIPS32-NEXT: mov.d $f4, $f2
; MIPS32-NEXT: movn.d $f4, $f0, $3
-; MIPS32-NEXT: andi $2, $2, 1
-; MIPS32-NEXT: mov.d $f2, $f0
-; MIPS32-NEXT: movn.d $f2, $f4, $2
-; MIPS32-NEXT: sdc1 $f2, 0($1)
-; MIPS32-NEXT: sdc1 $f0, 0($1)
+; MIPS32-NEXT: lw $3, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sdc1 $f4, 0($3)
+; MIPS32-NEXT: sdc1 $f2, 0($3)
; MIPS32-NEXT: addiu $sp, $sp, 72
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
@@ -191,39 +193,39 @@ define void @long_chain_i64_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64* %a, i64* %
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $sp, $sp, -80
; MIPS32-NEXT: .cfi_def_cfa_offset 80
-; MIPS32-NEXT: sw $4, 48($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $5, 52($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $6, 56($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $7, 60($sp) # 4-byte Folded Spill
; MIPS32-NEXT: addiu $1, $sp, 96
; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 64($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: addiu $1, $sp, 100
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 68($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: addiu $1, $sp, 104
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 72($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: ori $1, $zero, 0
+; MIPS32-NEXT: addiu $2, $sp, 100
+; MIPS32-NEXT: lw $2, 0($2)
+; MIPS32-NEXT: addiu $3, $sp, 104
+; MIPS32-NEXT: lw $3, 0($3)
+; MIPS32-NEXT: ori $8, $zero, 0
+; MIPS32-NEXT: andi $9, $4, 1
; MIPS32-NEXT: sw $1, 76($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: andi $1, $4, 1
-; MIPS32-NEXT: bnez $1, $BB1_12
+; MIPS32-NEXT: sw $4, 72($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $5, 68($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $6, 64($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $7, 60($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $2, 56($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $3, 52($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $8, 48($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $9, $BB1_12
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB1_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_2: # %pre.PHI.1
-; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB1_7
+; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB1_7
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.3: # %pre.PHI.1
; MIPS32-NEXT: j $BB1_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_4: # %pre.PHI.1.0
-; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB1_8
+; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB1_8
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0
; MIPS32-NEXT: j $BB1_6
@@ -231,56 +233,58 @@ define void @long_chain_i64_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64* %a, i64* %
; MIPS32-NEXT: $BB1_6: # %b.PHI.1.0
; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
-; MIPS32-NEXT: lw $1, 4($1)
-; MIPS32-NEXT: sw $2, 40($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $3, 4($1)
+; MIPS32-NEXT: sw $2, 44($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $3, 40($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB1_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_7: # %b.PHI.1.1
-; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
-; MIPS32-NEXT: lw $1, 4($1)
-; MIPS32-NEXT: sw $2, 40($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $3, 4($1)
+; MIPS32-NEXT: sw $2, 44($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $3, 40($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB1_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_8: # %b.PHI.1.2
-; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
-; MIPS32-NEXT: lw $1, 4($1)
-; MIPS32-NEXT: sw $2, 40($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $3, 4($1)
+; MIPS32-NEXT: sw $2, 44($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $3, 40($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB1_9: # %b.PHI.1
-; MIPS32-NEXT: lw $2, 76($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $4, 40($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $3, 44($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: sw $3, 16($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $3, 28($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: move $3, $2
-; MIPS32-NEXT: sw $3, 32($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $2, 36($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: bnez $1, $BB1_11
+; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $3, 64($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $4, $3, 1
+; MIPS32-NEXT: move $5, $2
+; MIPS32-NEXT: move $6, $1
+; MIPS32-NEXT: lw $7, 48($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $8, 48($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sw $1, 36($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $2, 32($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $5, 28($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $6, 24($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $7, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $8, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $4, $BB1_11
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.10: # %b.PHI.1
; MIPS32-NEXT: j $BB1_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_11: # %b.PHI.1.end
-; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 72($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $3, 20($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: sw $3, 0($2)
-; MIPS32-NEXT: sw $1, 4($2)
+; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 52($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sw $1, 0($2)
+; MIPS32-NEXT: lw $3, 36($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sw $3, 4($2)
; MIPS32-NEXT: addiu $sp, $sp, 80
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_12: # %pre.PHI.2
-; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB1_14
+; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB1_14
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.13: # %pre.PHI.2
; MIPS32-NEXT: j $BB1_15
@@ -288,64 +292,66 @@ define void @long_chain_i64_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64* %a, i64* %
; MIPS32-NEXT: $BB1_14: # %b.PHI.2.0
; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
-; MIPS32-NEXT: lw $1, 4($1)
-; MIPS32-NEXT: sw $2, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $3, 4($1)
+; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $3, 8($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB1_16
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_15: # %b.PHI.2.1
-; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
-; MIPS32-NEXT: lw $1, 4($1)
-; MIPS32-NEXT: sw $2, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $3, 4($1)
+; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $3, 8($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB1_16: # %b.PHI.2
-; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $3, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $3, 68($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $4, $3, 1
+; MIPS32-NEXT: move $5, $2
+; MIPS32-NEXT: move $6, $1
+; MIPS32-NEXT: move $7, $2
+; MIPS32-NEXT: move $8, $1
+; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $3, 4($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: move $4, $3
-; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: move $4, $2
-; MIPS32-NEXT: sw $4, 28($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $3, 32($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $2, 36($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: bnez $1, $BB1_19
+; MIPS32-NEXT: sw $5, 28($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $6, 24($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $7, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $8, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $4, $BB1_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.17: # %b.PHI.2
; MIPS32-NEXT: j $BB1_18
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_18: # %b.PHI.2.end
; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 72($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 52($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sw $1, 0($2)
; MIPS32-NEXT: lw $3, 4($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: sw $3, 0($2)
-; MIPS32-NEXT: sw $1, 4($2)
+; MIPS32-NEXT: sw $3, 4($2)
; MIPS32-NEXT: addiu $sp, $sp, 80
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_19: # %b.PHI.3
-; MIPS32-NEXT: lw $2, 72($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $5, 52($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $7, 56($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $3, 24($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $4, 32($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $6, 36($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $7, $7, 1
-; MIPS32-NEXT: movn $4, $3, $7
-; MIPS32-NEXT: movn $6, $1, $7
-; MIPS32-NEXT: andi $7, $5, 1
-; MIPS32-NEXT: move $5, $3
-; MIPS32-NEXT: movn $5, $4, $7
-; MIPS32-NEXT: move $4, $1
-; MIPS32-NEXT: movn $4, $6, $7
-; MIPS32-NEXT: sw $5, 0($2)
-; MIPS32-NEXT: sw $4, 4($2)
-; MIPS32-NEXT: sw $3, 0($2)
-; MIPS32-NEXT: sw $1, 4($2)
+; MIPS32-NEXT: lw $4, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $5, 64($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $6, $5, 1
+; MIPS32-NEXT: movn $2, $4, $6
+; MIPS32-NEXT: movn $1, $3, $6
+; MIPS32-NEXT: lw $6, 68($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $7, $6, 1
+; MIPS32-NEXT: move $8, $4
+; MIPS32-NEXT: movn $8, $2, $7
+; MIPS32-NEXT: move $2, $3
+; MIPS32-NEXT: movn $2, $1, $7
+; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sw $8, 0($1)
+; MIPS32-NEXT: sw $2, 4($1)
+; MIPS32-NEXT: sw $4, 0($1)
+; MIPS32-NEXT: sw $3, 4($1)
; MIPS32-NEXT: addiu $sp, $sp, 80
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
@@ -412,124 +418,126 @@ define void @long_chain_ambiguous_double_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, do
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $sp, $sp, -72
; MIPS32-NEXT: .cfi_def_cfa_offset 72
-; MIPS32-NEXT: sw $4, 44($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $5, 48($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $6, 52($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $7, 56($sp) # 4-byte Folded Spill
; MIPS32-NEXT: addiu $1, $sp, 88
; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 60($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: addiu $1, $sp, 92
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 64($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: addiu $1, $sp, 96
-; MIPS32-NEXT: lw $1, 0($1)
+; MIPS32-NEXT: addiu $2, $sp, 92
+; MIPS32-NEXT: lw $2, 0($2)
+; MIPS32-NEXT: addiu $3, $sp, 96
+; MIPS32-NEXT: lw $3, 0($3)
+; MIPS32-NEXT: andi $8, $4, 1
; MIPS32-NEXT: sw $1, 68($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: andi $1, $4, 1
-; MIPS32-NEXT: bnez $1, $BB2_12
+; MIPS32-NEXT: sw $4, 64($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $5, 60($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $6, 56($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $7, 52($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $2, 48($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $3, 44($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $8, $BB2_12
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB2_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_2: # %pre.PHI.1
-; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB2_7
+; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB2_7
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.3: # %pre.PHI.1
; MIPS32-NEXT: j $BB2_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_4: # %pre.PHI.1.0
-; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB2_8
+; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB2_8
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0
; MIPS32-NEXT: j $BB2_6
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_6: # %b.PHI.1.0
-; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill
; MIPS32-NEXT: j $BB2_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_7: # %b.PHI.1.1
-; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill
; MIPS32-NEXT: j $BB2_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_8: # %b.PHI.1.2
-; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill
; MIPS32-NEXT: $BB2_9: # %b.PHI.1
-; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 32($sp) # 8-byte Folded Reload
-; MIPS32-NEXT: sdc1 $f0, 16($sp) # 8-byte Folded Spill
-; MIPS32-NEXT: andi $1, $1, 1
+; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: mov.d $f2, $f0
; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill
-; MIPS32-NEXT: bnez $1, $BB2_11
+; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill
+; MIPS32-NEXT: bnez $2, $BB2_11
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.10: # %b.PHI.1
; MIPS32-NEXT: j $BB2_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_11: # %b.PHI.1.end
-; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload
-; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload
+; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sdc1 $f0, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 72
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_12: # %pre.PHI.2
-; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB2_14
+; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB2_14
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.13: # %pre.PHI.2
; MIPS32-NEXT: j $BB2_15
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_14: # %b.PHI.2.0
-; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill
; MIPS32-NEXT: j $BB2_16
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_15: # %b.PHI.2.1
-; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill
; MIPS32-NEXT: $BB2_16: # %b.PHI.2
-; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload
+; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: mov.d $f2, $f0
; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill
-; MIPS32-NEXT: bnez $1, $BB2_19
+; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill
+; MIPS32-NEXT: bnez $2, $BB2_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.17: # %b.PHI.2
; MIPS32-NEXT: j $BB2_18
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_18: # %b.PHI.2.end
; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload
-; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sdc1 $f0, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 72
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_19: # %b.PHI.3
-; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 48($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $3, 52($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload
-; MIPS32-NEXT: mov.d $f4, $f0
-; MIPS32-NEXT: andi $3, $3, 1
+; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload
+; MIPS32-NEXT: ldc1 $f2, 16($sp) # 8-byte Folded Reload
+; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: movn.d $f0, $f2, $2
+; MIPS32-NEXT: lw $2, 60($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $3, $2, 1
+; MIPS32-NEXT: mov.d $f4, $f2
; MIPS32-NEXT: movn.d $f4, $f0, $3
-; MIPS32-NEXT: andi $2, $2, 1
-; MIPS32-NEXT: mov.d $f2, $f0
-; MIPS32-NEXT: movn.d $f2, $f4, $2
-; MIPS32-NEXT: sdc1 $f2, 0($1)
-; MIPS32-NEXT: sdc1 $f0, 0($1)
+; MIPS32-NEXT: lw $3, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sdc1 $f4, 0($3)
+; MIPS32-NEXT: sdc1 $f2, 0($3)
; MIPS32-NEXT: addiu $sp, $sp, 72
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
@@ -597,133 +605,135 @@ define void @long_chain_double_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, double* %a,
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $sp, $sp, -88
; MIPS32-NEXT: .cfi_def_cfa_offset 88
-; MIPS32-NEXT: sw $4, 52($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $5, 56($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $6, 60($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $7, 64($sp) # 4-byte Folded Spill
; MIPS32-NEXT: addiu $1, $sp, 104
; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 68($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: addiu $1, $sp, 108
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 72($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: addiu $1, $sp, 112
-; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 76($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: ori $2, $zero, 0
-; MIPS32-NEXT: ori $1, $zero, 0
-; MIPS32-NEXT: mtc1 $1, $f0
-; MIPS32-NEXT: mtc1 $2, $f1
-; MIPS32-NEXT: sdc1 $f0, 80($sp) # 8-byte Folded Spill
-; MIPS32-NEXT: andi $1, $4, 1
-; MIPS32-NEXT: bnez $1, $BB3_12
+; MIPS32-NEXT: addiu $2, $sp, 108
+; MIPS32-NEXT: lw $2, 0($2)
+; MIPS32-NEXT: addiu $3, $sp, 112
+; MIPS32-NEXT: lw $3, 0($3)
+; MIPS32-NEXT: ori $8, $zero, 0
+; MIPS32-NEXT: ori $9, $zero, 0
+; MIPS32-NEXT: mtc1 $9, $f0
+; MIPS32-NEXT: mtc1 $8, $f1
+; MIPS32-NEXT: andi $8, $4, 1
+; MIPS32-NEXT: sw $1, 84($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $4, 80($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $5, 76($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $6, 72($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $7, 68($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $2, 64($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $3, 60($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sdc1 $f0, 48($sp) # 8-byte Folded Spill
+; MIPS32-NEXT: bnez $8, $BB3_12
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB3_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_2: # %pre.PHI.1
-; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB3_7
+; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB3_7
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.3: # %pre.PHI.1
; MIPS32-NEXT: j $BB3_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_4: # %pre.PHI.1.0
-; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB3_8
+; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB3_8
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0
; MIPS32-NEXT: j $BB3_6
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_6: # %b.PHI.1.0
-; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 40($sp) # 8-byte Folded Spill
; MIPS32-NEXT: j $BB3_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_7: # %b.PHI.1.1
-; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 84($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 40($sp) # 8-byte Folded Spill
; MIPS32-NEXT: j $BB3_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_8: # %b.PHI.1.2
-; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 40($sp) # 8-byte Folded Spill
; MIPS32-NEXT: $BB3_9: # %b.PHI.1
-; MIPS32-NEXT: ldc1 $f0, 80($sp) # 8-byte Folded Reload
-; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: ldc1 $f2, 40($sp) # 8-byte Folded Reload
-; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: sdc1 $f2, 24($sp) # 8-byte Folded Spill
+; MIPS32-NEXT: ldc1 $f0, 40($sp) # 8-byte Folded Reload
+; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: mov.d $f2, $f0
+; MIPS32-NEXT: ldc1 $f4, 48($sp) # 8-byte Folded Reload
; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill
-; MIPS32-NEXT: bnez $1, $BB3_11
+; MIPS32-NEXT: sdc1 $f2, 24($sp) # 8-byte Folded Spill
+; MIPS32-NEXT: sdc1 $f4, 16($sp) # 8-byte Folded Spill
+; MIPS32-NEXT: bnez $2, $BB3_11
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.10: # %b.PHI.1
; MIPS32-NEXT: j $BB3_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_11: # %b.PHI.1.end
-; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload
-; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: ldc1 $f0, 32($sp) # 8-byte Folded Reload
+; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sdc1 $f0, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 88
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_12: # %pre.PHI.2
-; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: bnez $1, $BB3_14
+; MIPS32-NEXT: lw $1, 80($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: bnez $2, $BB3_14
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.13: # %pre.PHI.2
; MIPS32-NEXT: j $BB3_15
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_14: # %b.PHI.2.0
-; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill
; MIPS32-NEXT: j $BB3_16
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_15: # %b.PHI.2.1
-; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 84($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill
; MIPS32-NEXT: $BB3_16: # %b.PHI.2
-; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload
-; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill
-; MIPS32-NEXT: andi $1, $1, 1
+; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: mov.d $f2, $f0
+; MIPS32-NEXT: mov.d $f4, $f0
+; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill
; MIPS32-NEXT: sdc1 $f2, 24($sp) # 8-byte Folded Spill
-; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill
-; MIPS32-NEXT: bnez $1, $BB3_19
+; MIPS32-NEXT: sdc1 $f4, 16($sp) # 8-byte Folded Spill
+; MIPS32-NEXT: bnez $2, $BB3_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.17: # %b.PHI.2
; MIPS32-NEXT: j $BB3_18
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_18: # %b.PHI.2.end
; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload
-; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sdc1 $f0, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 88
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_19: # %b.PHI.3
-; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 56($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $3, 60($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload
-; MIPS32-NEXT: ldc1 $f4, 32($sp) # 8-byte Folded Reload
-; MIPS32-NEXT: andi $3, $3, 1
+; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload
+; MIPS32-NEXT: ldc1 $f2, 24($sp) # 8-byte Folded Reload
+; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $2, $1, 1
+; MIPS32-NEXT: movn.d $f0, $f2, $2
+; MIPS32-NEXT: lw $2, 76($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: andi $3, $2, 1
+; MIPS32-NEXT: mov.d $f4, $f2
; MIPS32-NEXT: movn.d $f4, $f0, $3
-; MIPS32-NEXT: andi $2, $2, 1
-; MIPS32-NEXT: mov.d $f2, $f0
-; MIPS32-NEXT: movn.d $f2, $f4, $2
-; MIPS32-NEXT: sdc1 $f2, 0($1)
-; MIPS32-NEXT: sdc1 $f0, 0($1)
+; MIPS32-NEXT: lw $3, 60($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sdc1 $f4, 0($3)
+; MIPS32-NEXT: sdc1 $f2, 0($3)
; MIPS32-NEXT: addiu $sp, $sp, 88
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
index 2174db4bdd24..659eadf181c0 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
@@ -86,14 +86,13 @@ entry:
define i64 @mul_i64(i64 %a, i64 %b) {
; MIPS32-LABEL: mul_i64:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: move $3, $4
-; MIPS32-NEXT: mul $2, $6, $3
-; MIPS32-NEXT: mul $1, $7, $3
-; MIPS32-NEXT: mul $4, $6, $5
-; MIPS32-NEXT: multu $6, $3
-; MIPS32-NEXT: mfhi $3
-; MIPS32-NEXT: addu $1, $1, $4
-; MIPS32-NEXT: addu $3, $1, $3
+; MIPS32-NEXT: mul $2, $6, $4
+; MIPS32-NEXT: mul $1, $7, $4
+; MIPS32-NEXT: mul $3, $6, $5
+; MIPS32-NEXT: multu $6, $4
+; MIPS32-NEXT: mfhi $4
+; MIPS32-NEXT: addu $1, $1, $3
+; MIPS32-NEXT: addu $3, $1, $4
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -104,73 +103,72 @@ entry:
define i128 @mul_i128(i128 %a, i128 %b) {
; MIPS32-LABEL: mul_i128:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: move $14, $4
-; MIPS32-NEXT: move $13, $5
-; MIPS32-NEXT: move $12, $6
-; MIPS32-NEXT: move $9, $7
; MIPS32-NEXT: addiu $1, $sp, 16
-; MIPS32-NEXT: lw $6, 0($1)
-; MIPS32-NEXT: addiu $1, $sp, 20
-; MIPS32-NEXT: lw $7, 0($1)
-; MIPS32-NEXT: addiu $1, $sp, 24
-; MIPS32-NEXT: lw $8, 0($1)
-; MIPS32-NEXT: addiu $1, $sp, 28
; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: mul $2, $6, $14
-; MIPS32-NEXT: mul $3, $7, $14
-; MIPS32-NEXT: mul $4, $6, $13
-; MIPS32-NEXT: multu $6, $14
-; MIPS32-NEXT: mfhi $5
-; MIPS32-NEXT: addu $3, $3, $4
-; MIPS32-NEXT: sltu $4, $3, $4
-; MIPS32-NEXT: andi $4, $4, 1
-; MIPS32-NEXT: addu $3, $3, $5
-; MIPS32-NEXT: sltu $5, $3, $5
-; MIPS32-NEXT: andi $5, $5, 1
-; MIPS32-NEXT: addu $10, $4, $5
-; MIPS32-NEXT: mul $4, $8, $14
-; MIPS32-NEXT: mul $5, $7, $13
-; MIPS32-NEXT: mul $24, $6, $12
-; MIPS32-NEXT: multu $7, $14
+; MIPS32-NEXT: addiu $2, $sp, 20
+; MIPS32-NEXT: lw $2, 0($2)
+; MIPS32-NEXT: addiu $3, $sp, 24
+; MIPS32-NEXT: lw $3, 0($3)
+; MIPS32-NEXT: addiu $8, $sp, 28
+; MIPS32-NEXT: lw $8, 0($8)
+; MIPS32-NEXT: mul $9, $1, $4
+; MIPS32-NEXT: mul $10, $2, $4
+; MIPS32-NEXT: mul $11, $1, $5
+; MIPS32-NEXT: multu $1, $4
+; MIPS32-NEXT: mfhi $12
+; MIPS32-NEXT: addu $10, $10, $11
+; MIPS32-NEXT: sltu $11, $10, $11
+; MIPS32-NEXT: andi $11, $11, 1
+; MIPS32-NEXT: addu $10, $10, $12
+; MIPS32-NEXT: sltu $12, $10, $12
+; MIPS32-NEXT: andi $12, $12, 1
+; MIPS32-NEXT: addu $11, $11, $12
+; MIPS32-NEXT: mul $12, $3, $4
+; MIPS32-NEXT: mul $13, $2, $5
+; MIPS32-NEXT: mul $14, $1, $6
+; MIPS32-NEXT: multu $2, $4
; MIPS32-NEXT: mfhi $15
-; MIPS32-NEXT: multu $6, $13
-; MIPS32-NEXT: mfhi $11
-; MIPS32-NEXT: addu $4, $4, $5
-; MIPS32-NEXT: sltu $5, $4, $5
-; MIPS32-NEXT: andi $5, $5, 1
-; MIPS32-NEXT: addu $4, $4, $24
-; MIPS32-NEXT: sltu $24, $4, $24
-; MIPS32-NEXT: andi $24, $24, 1
-; MIPS32-NEXT: addu $5, $5, $24
-; MIPS32-NEXT: addu $4, $4, $15
-; MIPS32-NEXT: sltu $15, $4, $15
-; MIPS32-NEXT: andi $15, $15, 1
-; MIPS32-NEXT: addu $5, $5, $15
-; MIPS32-NEXT: addu $4, $4, $11
-; MIPS32-NEXT: sltu $11, $4, $11
+; MIPS32-NEXT: multu $1, $5
+; MIPS32-NEXT: mfhi $24
+; MIPS32-NEXT: addu $12, $12, $13
+; MIPS32-NEXT: sltu $13, $12, $13
+; MIPS32-NEXT: andi $13, $13, 1
+; MIPS32-NEXT: addu $12, $12, $14
+; MIPS32-NEXT: sltu $14, $12, $14
+; MIPS32-NEXT: andi $14, $14, 1
+; MIPS32-NEXT: addu $13, $13, $14
+; MIPS32-NEXT: addu $12, $12, $15
+; MIPS32-NEXT: sltu $14, $12, $15
+; MIPS32-NEXT: andi $14, $14, 1
+; MIPS32-NEXT: addu $13, $13, $14
+; MIPS32-NEXT: addu $12, $12, $24
+; MIPS32-NEXT: sltu $14, $12, $24
+; MIPS32-NEXT: andi $14, $14, 1
+; MIPS32-NEXT: addu $13, $13, $14
+; MIPS32-NEXT: addu $12, $12, $11
+; MIPS32-NEXT: sltu $11, $12, $11
; MIPS32-NEXT: andi $11, $11, 1
-; MIPS32-NEXT: addu $5, $5, $11
-; MIPS32-NEXT: addu $4, $4, $10
-; MIPS32-NEXT: sltu $10, $4, $10
-; MIPS32-NEXT: andi $10, $10, 1
-; MIPS32-NEXT: addu $5, $5, $10
-; MIPS32-NEXT: mul $1, $1, $14
-; MIPS32-NEXT: mul $11, $8, $13
-; MIPS32-NEXT: mul $10, $7, $12
-; MIPS32-NEXT: mul $9, $6, $9
-; MIPS32-NEXT: multu $8, $14
-; MIPS32-NEXT: mfhi $8
-; MIPS32-NEXT: multu $7, $13
-; MIPS32-NEXT: mfhi $7
-; MIPS32-NEXT: multu $6, $12
-; MIPS32-NEXT: mfhi $6
-; MIPS32-NEXT: addu $1, $1, $11
-; MIPS32-NEXT: addu $1, $1, $10
-; MIPS32-NEXT: addu $1, $1, $9
-; MIPS32-NEXT: addu $1, $1, $8
-; MIPS32-NEXT: addu $1, $1, $7
-; MIPS32-NEXT: addu $1, $1, $6
-; MIPS32-NEXT: addu $5, $1, $5
+; MIPS32-NEXT: addu $11, $13, $11
+; MIPS32-NEXT: mul $8, $8, $4
+; MIPS32-NEXT: mul $13, $3, $5
+; MIPS32-NEXT: mul $14, $2, $6
+; MIPS32-NEXT: mul $7, $1, $7
+; MIPS32-NEXT: multu $3, $4
+; MIPS32-NEXT: mfhi $3
+; MIPS32-NEXT: multu $2, $5
+; MIPS32-NEXT: mfhi $2
+; MIPS32-NEXT: multu $1, $6
+; MIPS32-NEXT: mfhi $1
+; MIPS32-NEXT: addu $4, $8, $13
+; MIPS32-NEXT: addu $4, $4, $14
+; MIPS32-NEXT: addu $4, $4, $7
+; MIPS32-NEXT: addu $3, $4, $3
+; MIPS32-NEXT: addu $2, $3, $2
+; MIPS32-NEXT: addu $1, $2, $1
+; MIPS32-NEXT: addu $5, $1, $11
+; MIPS32-NEXT: move $2, $9
+; MIPS32-NEXT: move $3, $10
+; MIPS32-NEXT: move $4, $12
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul_vec.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul_vec.ll
index a71e75958cdd..d3f085c239fd 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul_vec.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul_vec.ll
@@ -4,9 +4,9 @@
define void @mul_v16i8(<16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %c) {
; P5600-LABEL: mul_v16i8:
; P5600: # %bb.0: # %entry
-; P5600-NEXT: ld.b $w1, 0($4)
-; P5600-NEXT: ld.b $w0, 0($5)
-; P5600-NEXT: mulv.b $w0, $w0, $w1
+; P5600-NEXT: ld.b $w0, 0($4)
+; P5600-NEXT: ld.b $w1, 0($5)
+; P5600-NEXT: mulv.b $w0, $w1, $w0
; P5600-NEXT: st.b $w0, 0($6)
; P5600-NEXT: jr $ra
; P5600-NEXT: nop
@@ -21,9 +21,9 @@ entry:
define void @mul_v8i16(<8 x i16>* %a, <8 x i16>* %b, <8 x i16>* %c) {
; P5600-LABEL: mul_v8i16:
; P5600: # %bb.0: # %entry
-; P5600-NEXT: ld.h $w1, 0($4)
-; P5600-NEXT: ld.h $w0, 0($5)
-; P5600-NEXT: mulv.h $w0, $w0, $w1
+; P5600-NEXT: ld.h $w0, 0($4)
+; P5600-NEXT: ld.h $w1, 0($5)
+; P5600-NEXT: mulv.h $w0, $w1, $w0
; P5600-NEXT: st.h $w0, 0($6)
; P5600-NEXT: jr $ra
; P5600-NEXT: nop
@@ -38,9 +38,9 @@ entry:
define void @mul_v4i32(<4 x i32>* %a, <4 x i32>* %b, <4 x i32>* %c) {
; P5600-LABEL: mul_v4i32:
; P5600: # %bb.0: # %entry
-; P5600-NEXT: ld.w $w1, 0($4)
-; P5600-NEXT: ld.w $w0, 0($5)
-; P5600-NEXT: mulv.w $w0, $w0, $w1
+; P5600-NEXT: ld.w $w0, 0($4)
+; P5600-NEXT: ld.w $w1, 0($5)
+; P5600-NEXT: mulv.w $w0, $w1, $w0
; P5600-NEXT: st.w $w0, 0($6)
; P5600-NEXT: jr $ra
; P5600-NEXT: nop
@@ -55,9 +55,9 @@ entry:
define void @mul_v2i64(<2 x i64>* %a, <2 x i64>* %b, <2 x i64>* %c) {
; P5600-LABEL: mul_v2i64:
; P5600: # %bb.0: # %entry
-; P5600-NEXT: ld.d $w1, 0($4)
-; P5600-NEXT: ld.d $w0, 0($5)
-; P5600-NEXT: mulv.d $w0, $w0, $w1
+; P5600-NEXT: ld.d $w0, 0($4)
+; P5600-NEXT: ld.d $w1, 0($5)
+; P5600-NEXT: mulv.d $w0, $w1, $w0
; P5600-NEXT: st.d $w0, 0($6)
; P5600-NEXT: jr $ra
; P5600-NEXT: nop
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/phi.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/phi.ll
index d44023bf7f0c..410c53f98751 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/phi.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/phi.ll
@@ -6,24 +6,25 @@ define i1 @phi_i1(i1 %cnd, i1 %a, i1 %b) {
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $sp, $sp, -16
; MIPS32-NEXT: .cfi_def_cfa_offset 16
-; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS32-NEXT: andi $1, $4, 1
+; MIPS32-NEXT: sw $5, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $1, $BB0_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB0_3
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_2: # %cond.true
-; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB0_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_3: # %cond.false
-; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB0_4: # %cond.end
-; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: addiu $sp, $sp, 16
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
@@ -46,24 +47,25 @@ define i8 @phi_i8(i1 %cnd, i8 %a, i8 %b) {
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $sp, $sp, -16
; MIPS32-NEXT: .cfi_def_cfa_offset 16
-; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS32-NEXT: andi $1, $4, 1
+; MIPS32-NEXT: sw $5, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $1, $BB1_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB1_3
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_2: # %cond.true
-; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB1_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_3: # %cond.false
-; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB1_4: # %cond.end
-; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: addiu $sp, $sp, 16
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
@@ -86,24 +88,25 @@ define i16 @phi_i16(i1 %cnd, i16 %a, i16 %b) {
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $sp, $sp, -16
; MIPS32-NEXT: .cfi_def_cfa_offset 16
-; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS32-NEXT: andi $1, $4, 1
+; MIPS32-NEXT: sw $5, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $1, $BB2_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB2_3
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_2: # %cond.true
-; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB2_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_3: # %cond.false
-; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB2_4: # %cond.end
-; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: addiu $sp, $sp, 16
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
@@ -126,24 +129,25 @@ define i32 @phi_i32(i1 %cnd, i32 %a, i32 %b) {
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $sp, $sp, -16
; MIPS32-NEXT: .cfi_def_cfa_offset 16
-; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS32-NEXT: andi $1, $4, 1
+; MIPS32-NEXT: sw $5, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $1, $BB3_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB3_3
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_2: # %cond.true
-; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB3_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_3: # %cond.false
-; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB3_4: # %cond.end
-; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: addiu $sp, $sp, 16
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
@@ -166,35 +170,36 @@ define i64 @phi_i64(i1 %cnd, i64 %a, i64 %b) {
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $sp, $sp, -24
; MIPS32-NEXT: .cfi_def_cfa_offset 24
-; MIPS32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS32-NEXT: addiu $1, $sp, 40
; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: addiu $1, $sp, 44
-; MIPS32-NEXT: lw $1, 0($1)
+; MIPS32-NEXT: addiu $2, $sp, 44
+; MIPS32-NEXT: lw $2, 0($2)
+; MIPS32-NEXT: andi $3, $4, 1
; MIPS32-NEXT: sw $1, 20($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: andi $1, $4, 1
-; MIPS32-NEXT: bnez $1, $BB4_2
+; MIPS32-NEXT: sw $6, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $2, 8($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $3, $BB4_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB4_3
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB4_2: # %cond.true
-; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB4_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB4_3: # %cond.false
; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $2, 16($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB4_4: # %cond.end
-; MIPS32-NEXT: lw $2, 0($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $3, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: move $3, $1
; MIPS32-NEXT: addiu $sp, $sp, 24
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
@@ -217,12 +222,12 @@ define void @phi_ambiguous_i64_in_fpr(i1 %cnd, i64* %i64_ptr_a, i64* %i64_ptr_b,
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $sp, $sp, -32
; MIPS32-NEXT: .cfi_def_cfa_offset 32
-; MIPS32-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS32-NEXT: ldc1 $f0, 0($5)
-; MIPS32-NEXT: sdc1 $f0, 16($sp) # 8-byte Folded Spill
-; MIPS32-NEXT: ldc1 $f0, 0($6)
-; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill
+; MIPS32-NEXT: ldc1 $f2, 0($6)
; MIPS32-NEXT: andi $1, $4, 1
+; MIPS32-NEXT: sw $7, 28($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sdc1 $f0, 16($sp) # 8-byte Folded Spill
+; MIPS32-NEXT: sdc1 $f2, 8($sp) # 8-byte Folded Spill
; MIPS32-NEXT: bnez $1, $BB5_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %entry
@@ -234,11 +239,11 @@ define void @phi_ambiguous_i64_in_fpr(i1 %cnd, i64* %i64_ptr_a, i64* %i64_ptr_b,
; MIPS32-NEXT: j $BB5_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB5_3: # %cond.false
-; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload
+; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload
; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill
; MIPS32-NEXT: $BB5_4: # %cond.end
-; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload
+; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sdc1 $f0, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 32
; MIPS32-NEXT: jr $ra
@@ -265,21 +270,21 @@ define float @phi_float(i1 %cnd, float %a, float %b) {
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $sp, $sp, -16
; MIPS32-NEXT: .cfi_def_cfa_offset 16
-; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS32-NEXT: andi $1, $4, 1
+; MIPS32-NEXT: sw $5, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $1, $BB6_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB6_3
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB6_2: # %cond.true
-; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB6_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB6_3: # %cond.false
-; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB6_4: # %cond.end
; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
@@ -306,28 +311,28 @@ define void @phi_ambiguous_float_in_gpr(i1 %cnd, float* %f32_ptr_a, float* %f32_
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $sp, $sp, -16
; MIPS32-NEXT: .cfi_def_cfa_offset 16
-; MIPS32-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: lw $1, 0($5)
-; MIPS32-NEXT: sw $1, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: lw $1, 0($6)
+; MIPS32-NEXT: lw $2, 0($6)
+; MIPS32-NEXT: andi $3, $4, 1
; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: andi $1, $4, 1
-; MIPS32-NEXT: bnez $1, $BB7_2
+; MIPS32-NEXT: sw $7, 8($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: bnez $3, $BB7_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB7_3
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB7_2: # %cond.true
-; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB7_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB7_3: # %cond.false
-; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB7_4: # %cond.end
-; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 0($2)
; MIPS32-NEXT: addiu $sp, $sp, 16
; MIPS32-NEXT: jr $ra
@@ -354,23 +359,23 @@ define double @phi_double(double %a, double %b, i1 %cnd) {
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $sp, $sp, -24
; MIPS32-NEXT: .cfi_def_cfa_offset 24
-; MIPS32-NEXT: sdc1 $f12, 8($sp) # 8-byte Folded Spill
-; MIPS32-NEXT: sdc1 $f14, 16($sp) # 8-byte Folded Spill
; MIPS32-NEXT: addiu $1, $sp, 40
; MIPS32-NEXT: lw $1, 0($1)
; MIPS32-NEXT: andi $1, $1, 1
+; MIPS32-NEXT: sdc1 $f12, 16($sp) # 8-byte Folded Spill
+; MIPS32-NEXT: sdc1 $f14, 8($sp) # 8-byte Folded Spill
; MIPS32-NEXT: bnez $1, $BB8_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB8_3
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB8_2: # %cond.true
-; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload
+; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload
; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill
; MIPS32-NEXT: j $BB8_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB8_3: # %cond.false
-; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload
+; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload
; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill
; MIPS32-NEXT: $BB8_4: # %cond.end
; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/rem_and_div.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/rem_and_div.ll
index f60bd998b7c8..d2520daf6f26 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/rem_and_div.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/rem_and_div.ll
@@ -6,11 +6,11 @@ define signext i8 @sdiv_i8(i8 signext %a, i8 signext %b) {
; MIPS32-LABEL: sdiv_i8:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: sll $1, $5, 24
-; MIPS32-NEXT: sra $2, $1, 24
-; MIPS32-NEXT: sll $1, $4, 24
; MIPS32-NEXT: sra $1, $1, 24
-; MIPS32-NEXT: div $zero, $2, $1
-; MIPS32-NEXT: teq $1, $zero, 7
+; MIPS32-NEXT: sll $2, $4, 24
+; MIPS32-NEXT: sra $2, $2, 24
+; MIPS32-NEXT: div $zero, $1, $2
+; MIPS32-NEXT: teq $2, $zero, 7
; MIPS32-NEXT: mflo $1
; MIPS32-NEXT: sll $1, $1, 24
; MIPS32-NEXT: sra $2, $1, 24
@@ -25,11 +25,11 @@ define signext i16 @sdiv_i16(i16 signext %a, i16 signext %b) {
; MIPS32-LABEL: sdiv_i16:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: sll $1, $5, 16
-; MIPS32-NEXT: sra $2, $1, 16
-; MIPS32-NEXT: sll $1, $4, 16
; MIPS32-NEXT: sra $1, $1, 16
-; MIPS32-NEXT: div $zero, $2, $1
-; MIPS32-NEXT: teq $1, $zero, 7
+; MIPS32-NEXT: sll $2, $4, 16
+; MIPS32-NEXT: sra $2, $2, 16
+; MIPS32-NEXT: div $zero, $1, $2
+; MIPS32-NEXT: teq $2, $zero, 7
; MIPS32-NEXT: mflo $1
; MIPS32-NEXT: sll $1, $1, 16
; MIPS32-NEXT: sra $2, $1, 16
@@ -60,12 +60,12 @@ define signext i64 @sdiv_i64(i64 signext %a, i64 signext %b) {
; MIPS32-NEXT: .cfi_def_cfa_offset 32
; MIPS32-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill
; MIPS32-NEXT: .cfi_offset 31, -4
-; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $5, 24($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill
; MIPS32-NEXT: move $4, $6
-; MIPS32-NEXT: lw $6, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sw $5, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: move $5, $7
-; MIPS32-NEXT: lw $7, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $6, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $7, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: jal __divdi3
; MIPS32-NEXT: nop
; MIPS32-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload
@@ -82,11 +82,11 @@ define signext i8 @srem_i8(i8 signext %a, i8 signext %b) {
; MIPS32-LABEL: srem_i8:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: sll $1, $5, 24
-; MIPS32-NEXT: sra $2, $1, 24
-; MIPS32-NEXT: sll $1, $4, 24
; MIPS32-NEXT: sra $1, $1, 24
-; MIPS32-NEXT: div $zero, $2, $1
-; MIPS32-NEXT: teq $1, $zero, 7
+; MIPS32-NEXT: sll $2, $4, 24
+; MIPS32-NEXT: sra $2, $2, 24
+; MIPS32-NEXT: div $zero, $1, $2
+; MIPS32-NEXT: teq $2, $zero, 7
; MIPS32-NEXT: mflo $1
; MIPS32-NEXT: sll $1, $1, 24
; MIPS32-NEXT: sra $2, $1, 24
@@ -101,11 +101,11 @@ define signext i16 @srem_i16(i16 signext %a, i16 signext %b) {
; MIPS32-LABEL: srem_i16:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: sll $1, $5, 16
-; MIPS32-NEXT: sra $2, $1, 16
-; MIPS32-NEXT: sll $1, $4, 16
; MIPS32-NEXT: sra $1, $1, 16
-; MIPS32-NEXT: div $zero, $2, $1
-; MIPS32-NEXT: teq $1, $zero, 7
+; MIPS32-NEXT: sll $2, $4, 16
+; MIPS32-NEXT: sra $2, $2, 16
+; MIPS32-NEXT: div $zero, $1, $2
+; MIPS32-NEXT: teq $2, $zero, 7
; MIPS32-NEXT: mfhi $1
; MIPS32-NEXT: sll $1, $1, 16
; MIPS32-NEXT: sra $2, $1, 16
@@ -136,12 +136,12 @@ define signext i64 @srem_i64(i64 signext %a, i64 signext %b) {
; MIPS32-NEXT: .cfi_def_cfa_offset 32
; MIPS32-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill
; MIPS32-NEXT: .cfi_offset 31, -4
-; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $5, 24($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill
; MIPS32-NEXT: move $4, $6
-; MIPS32-NEXT: lw $6, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sw $5, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: move $5, $7
-; MIPS32-NEXT: lw $7, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $6, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $7, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: jal __moddi3
; MIPS32-NEXT: nop
; MIPS32-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload
@@ -157,10 +157,10 @@ entry:
define signext i8 @udiv_i8(i8 signext %a, i8 signext %b) {
; MIPS32-LABEL: udiv_i8:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: andi $2, $5, 255
-; MIPS32-NEXT: andi $1, $4, 255
-; MIPS32-NEXT: divu $zero, $2, $1
-; MIPS32-NEXT: teq $1, $zero, 7
+; MIPS32-NEXT: andi $1, $5, 255
+; MIPS32-NEXT: andi $2, $4, 255
+; MIPS32-NEXT: divu $zero, $1, $2
+; MIPS32-NEXT: teq $2, $zero, 7
; MIPS32-NEXT: mflo $1
; MIPS32-NEXT: sll $1, $1, 24
; MIPS32-NEXT: sra $2, $1, 24
@@ -174,10 +174,10 @@ entry:
define signext i16 @udiv_i16(i16 signext %a, i16 signext %b) {
; MIPS32-LABEL: udiv_i16:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: andi $2, $5, 65535
-; MIPS32-NEXT: andi $1, $4, 65535
-; MIPS32-NEXT: divu $zero, $2, $1
-; MIPS32-NEXT: teq $1, $zero, 7
+; MIPS32-NEXT: andi $1, $5, 65535
+; MIPS32-NEXT: andi $2, $4, 65535
+; MIPS32-NEXT: divu $zero, $1, $2
+; MIPS32-NEXT: teq $2, $zero, 7
; MIPS32-NEXT: mflo $1
; MIPS32-NEXT: sll $1, $1, 16
; MIPS32-NEXT: sra $2, $1, 16
@@ -208,12 +208,12 @@ define signext i64 @udiv_i64(i64 signext %a, i64 signext %b) {
; MIPS32-NEXT: .cfi_def_cfa_offset 32
; MIPS32-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill
; MIPS32-NEXT: .cfi_offset 31, -4
-; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $5, 24($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill
; MIPS32-NEXT: move $4, $6
-; MIPS32-NEXT: lw $6, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sw $5, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: move $5, $7
-; MIPS32-NEXT: lw $7, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $6, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $7, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: jal __udivdi3
; MIPS32-NEXT: nop
; MIPS32-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload
@@ -229,10 +229,10 @@ entry:
define signext i8 @urem_i8(i8 signext %a, i8 signext %b) {
; MIPS32-LABEL: urem_i8:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: andi $2, $5, 255
-; MIPS32-NEXT: andi $1, $4, 255
-; MIPS32-NEXT: divu $zero, $2, $1
-; MIPS32-NEXT: teq $1, $zero, 7
+; MIPS32-NEXT: andi $1, $5, 255
+; MIPS32-NEXT: andi $2, $4, 255
+; MIPS32-NEXT: divu $zero, $1, $2
+; MIPS32-NEXT: teq $2, $zero, 7
; MIPS32-NEXT: mfhi $1
; MIPS32-NEXT: sll $1, $1, 24
; MIPS32-NEXT: sra $2, $1, 24
@@ -246,10 +246,10 @@ entry:
define signext i16 @urem_i16(i16 signext %a, i16 signext %b) {
; MIPS32-LABEL: urem_i16:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: andi $2, $5, 65535
-; MIPS32-NEXT: andi $1, $4, 65535
-; MIPS32-NEXT: divu $zero, $2, $1
-; MIPS32-NEXT: teq $1, $zero, 7
+; MIPS32-NEXT: andi $1, $5, 65535
+; MIPS32-NEXT: andi $2, $4, 65535
+; MIPS32-NEXT: divu $zero, $1, $2
+; MIPS32-NEXT: teq $2, $zero, 7
; MIPS32-NEXT: mfhi $1
; MIPS32-NEXT: sll $1, $1, 16
; MIPS32-NEXT: sra $2, $1, 16
@@ -280,12 +280,12 @@ define signext i64 @urem_i64(i64 signext %a, i64 signext %b) {
; MIPS32-NEXT: .cfi_def_cfa_offset 32
; MIPS32-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill
; MIPS32-NEXT: .cfi_offset 31, -4
-; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $5, 24($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill
; MIPS32-NEXT: move $4, $6
-; MIPS32-NEXT: lw $6, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sw $5, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: move $5, $7
-; MIPS32-NEXT: lw $7, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $6, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lw $7, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: jal __umoddi3
; MIPS32-NEXT: nop
; MIPS32-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll
index c292dba16ce3..7420a15cad3b 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll
@@ -4,9 +4,9 @@
define i8 @select_i8(i1 %test, i8 %a, i8 %b) {
; MIPS32-LABEL: select_i8:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: move $2, $6
; MIPS32-NEXT: andi $1, $4, 1
-; MIPS32-NEXT: movn $2, $5, $1
+; MIPS32-NEXT: movn $6, $5, $1
+; MIPS32-NEXT: move $2, $6
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -17,9 +17,9 @@ entry:
define i16 @select_i16(i1 %test, i16 %a, i16 %b) {
; MIPS32-LABEL: select_i16:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: move $2, $6
; MIPS32-NEXT: andi $1, $4, 1
-; MIPS32-NEXT: movn $2, $5, $1
+; MIPS32-NEXT: movn $6, $5, $1
+; MIPS32-NEXT: move $2, $6
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -30,9 +30,9 @@ entry:
define i32 @select_i32(i1 %test, i32 %a, i32 %b) {
; MIPS32-LABEL: select_i32:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: move $2, $6
; MIPS32-NEXT: andi $1, $4, 1
-; MIPS32-NEXT: movn $2, $5, $1
+; MIPS32-NEXT: movn $6, $5, $1
+; MIPS32-NEXT: move $2, $6
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -43,9 +43,9 @@ entry:
define i32* @select_ptr(i1 %test, i32* %a, i32* %b) {
; MIPS32-LABEL: select_ptr:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: move $2, $6
; MIPS32-NEXT: andi $1, $4, 1
-; MIPS32-NEXT: movn $2, $5, $1
+; MIPS32-NEXT: movn $6, $5, $1
+; MIPS32-NEXT: move $2, $6
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -56,12 +56,12 @@ entry:
define i32 @select_with_negation(i32 %a, i32 %b, i32 %x, i32 %y) {
; MIPS32-LABEL: select_with_negation:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: move $2, $7
-; MIPS32-NEXT: ori $3, $zero, 1
-; MIPS32-NEXT: slt $1, $4, $5
-; MIPS32-NEXT: xor $1, $1, $3
+; MIPS32-NEXT: ori $1, $zero, 1
+; MIPS32-NEXT: slt $2, $4, $5
+; MIPS32-NEXT: xor $1, $2, $1
; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: movn $2, $6, $1
+; MIPS32-NEXT: movn $7, $6, $1
+; MIPS32-NEXT: move $2, $7
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -74,13 +74,19 @@ entry:
define i64 @select_i64(i1 %test, i64 %a, i64 %b) {
; MIPS32-LABEL: select_i64:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $1, $sp, 16
-; MIPS32-NEXT: lw $2, 0($1)
-; MIPS32-NEXT: addiu $1, $sp, 20
-; MIPS32-NEXT: lw $3, 0($1)
-; MIPS32-NEXT: andi $1, $4, 1
-; MIPS32-NEXT: movn $2, $6, $1
-; MIPS32-NEXT: movn $3, $7, $1
+; MIPS32-NEXT: addiu $sp, $sp, -8
+; MIPS32-NEXT: .cfi_def_cfa_offset 8
+; MIPS32-NEXT: addiu $1, $sp, 24
+; MIPS32-NEXT: lw $1, 0($1)
+; MIPS32-NEXT: addiu $2, $sp, 28
+; MIPS32-NEXT: lw $2, 0($2)
+; MIPS32-NEXT: andi $3, $4, 1
+; MIPS32-NEXT: movn $1, $6, $3
+; MIPS32-NEXT: movn $2, $7, $3
+; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: move $2, $1
+; MIPS32-NEXT: lw $3, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: addiu $sp, $sp, 8
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -91,11 +97,11 @@ entry:
define void @select_ambiguous_i64_in_fpr(i1 %test, i64* %i64_ptr_a, i64* %i64_ptr_b, i64* %i64_ptr_c) {
; MIPS32-LABEL: select_ambiguous_i64_in_fpr:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: ldc1 $f2, 0($5)
-; MIPS32-NEXT: ldc1 $f0, 0($6)
+; MIPS32-NEXT: ldc1 $f0, 0($5)
+; MIPS32-NEXT: ldc1 $f2, 0($6)
; MIPS32-NEXT: andi $1, $4, 1
-; MIPS32-NEXT: movn.d $f0, $f2, $1
-; MIPS32-NEXT: sdc1 $f0, 0($7)
+; MIPS32-NEXT: movn.d $f2, $f0, $1
+; MIPS32-NEXT: sdc1 $f2, 0($7)
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -110,9 +116,10 @@ define float @select_float(i1 %test, float %a, float %b) {
; MIPS32-LABEL: select_float:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: andi $1, $4, 1
-; MIPS32-NEXT: mtc1 $5, $f1
-; MIPS32-NEXT: mtc1 $6, $f0
-; MIPS32-NEXT: movn.s $f0, $f1, $1
+; MIPS32-NEXT: mtc1 $5, $f0
+; MIPS32-NEXT: mtc1 $6, $f1
+; MIPS32-NEXT: movn.s $f1, $f0, $1
+; MIPS32-NEXT: mov.s $f0, $f1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -123,11 +130,11 @@ entry:
define void @select_ambiguous_float_in_gpr(i1 %test, float* %f32_ptr_a, float* %f32_ptr_b, float* %f32_ptr_c) {
; MIPS32-LABEL: select_ambiguous_float_in_gpr:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: lw $2, 0($5)
-; MIPS32-NEXT: lw $1, 0($6)
+; MIPS32-NEXT: lw $1, 0($5)
+; MIPS32-NEXT: lw $2, 0($6)
; MIPS32-NEXT: andi $3, $4, 1
-; MIPS32-NEXT: movn $1, $2, $3
-; MIPS32-NEXT: sw $1, 0($7)
+; MIPS32-NEXT: movn $2, $1, $3
+; MIPS32-NEXT: sw $2, 0($7)
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -141,11 +148,11 @@ entry:
define double @select_double(double %a, double %b, i1 %test) {
; MIPS32-LABEL: select_double:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: mov.d $f0, $f14
; MIPS32-NEXT: addiu $1, $sp, 16
; MIPS32-NEXT: lw $1, 0($1)
; MIPS32-NEXT: andi $1, $1, 1
-; MIPS32-NEXT: movn.d $f0, $f12, $1
+; MIPS32-NEXT: movn.d $f14, $f12, $1
+; MIPS32-NEXT: mov.d $f0, $f14
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll
index 07d094604684..0017f0c0ed08 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll
@@ -143,10 +143,10 @@ define float @u32tof32(i32 zeroext %a) {
; FP32-NEXT: lui $1, 17200
; FP32-NEXT: mtc1 $4, $f0
; FP32-NEXT: mtc1 $1, $f1
-; FP32-NEXT: lui $2, 17200
-; FP32-NEXT: ori $1, $zero, 0
-; FP32-NEXT: mtc1 $1, $f2
-; FP32-NEXT: mtc1 $2, $f3
+; FP32-NEXT: lui $1, 17200
+; FP32-NEXT: ori $2, $zero, 0
+; FP32-NEXT: mtc1 $2, $f2
+; FP32-NEXT: mtc1 $1, $f3
; FP32-NEXT: sub.d $f0, $f0, $f2
; FP32-NEXT: cvt.s.d $f0, $f0
; FP32-NEXT: jr $ra
@@ -157,10 +157,10 @@ define float @u32tof32(i32 zeroext %a) {
; FP64-NEXT: lui $1, 17200
; FP64-NEXT: mtc1 $4, $f0
; FP64-NEXT: mthc1 $1, $f0
-; FP64-NEXT: lui $2, 17200
-; FP64-NEXT: ori $1, $zero, 0
-; FP64-NEXT: mtc1 $1, $f1
-; FP64-NEXT: mthc1 $2, $f1
+; FP64-NEXT: lui $1, 17200
+; FP64-NEXT: ori $2, $zero, 0
+; FP64-NEXT: mtc1 $2, $f1
+; FP64-NEXT: mthc1 $1, $f1
; FP64-NEXT: sub.d $f0, $f0, $f1
; FP64-NEXT: cvt.s.d $f0, $f0
; FP64-NEXT: jr $ra
@@ -177,10 +177,10 @@ define float @u16tof32(i16 zeroext %a) {
; FP32-NEXT: lui $2, 17200
; FP32-NEXT: mtc1 $1, $f0
; FP32-NEXT: mtc1 $2, $f1
-; FP32-NEXT: lui $2, 17200
-; FP32-NEXT: ori $1, $zero, 0
-; FP32-NEXT: mtc1 $1, $f2
-; FP32-NEXT: mtc1 $2, $f3
+; FP32-NEXT: lui $1, 17200
+; FP32-NEXT: ori $2, $zero, 0
+; FP32-NEXT: mtc1 $2, $f2
+; FP32-NEXT: mtc1 $1, $f3
; FP32-NEXT: sub.d $f0, $f0, $f2
; FP32-NEXT: cvt.s.d $f0, $f0
; FP32-NEXT: jr $ra
@@ -192,10 +192,10 @@ define float @u16tof32(i16 zeroext %a) {
; FP64-NEXT: lui $2, 17200
; FP64-NEXT: mtc1 $1, $f0
; FP64-NEXT: mthc1 $2, $f0
-; FP64-NEXT: lui $2, 17200
-; FP64-NEXT: ori $1, $zero, 0
-; FP64-NEXT: mtc1 $1, $f1
-; FP64-NEXT: mthc1 $2, $f1
+; FP64-NEXT: lui $1, 17200
+; FP64-NEXT: ori $2, $zero, 0
+; FP64-NEXT: mtc1 $2, $f1
+; FP64-NEXT: mthc1 $1, $f1
; FP64-NEXT: sub.d $f0, $f0, $f1
; FP64-NEXT: cvt.s.d $f0, $f0
; FP64-NEXT: jr $ra
@@ -212,10 +212,10 @@ define float @u8tof32(i8 zeroext %a) {
; FP32-NEXT: lui $2, 17200
; FP32-NEXT: mtc1 $1, $f0
; FP32-NEXT: mtc1 $2, $f1
-; FP32-NEXT: lui $2, 17200
-; FP32-NEXT: ori $1, $zero, 0
-; FP32-NEXT: mtc1 $1, $f2
-; FP32-NEXT: mtc1 $2, $f3
+; FP32-NEXT: lui $1, 17200
+; FP32-NEXT: ori $2, $zero, 0
+; FP32-NEXT: mtc1 $2, $f2
+; FP32-NEXT: mtc1 $1, $f3
; FP32-NEXT: sub.d $f0, $f0, $f2
; FP32-NEXT: cvt.s.d $f0, $f0
; FP32-NEXT: jr $ra
@@ -227,10 +227,10 @@ define float @u8tof32(i8 zeroext %a) {
; FP64-NEXT: lui $2, 17200
; FP64-NEXT: mtc1 $1, $f0
; FP64-NEXT: mthc1 $2, $f0
-; FP64-NEXT: lui $2, 17200
-; FP64-NEXT: ori $1, $zero, 0
-; FP64-NEXT: mtc1 $1, $f1
-; FP64-NEXT: mthc1 $2, $f1
+; FP64-NEXT: lui $1, 17200
+; FP64-NEXT: ori $2, $zero, 0
+; FP64-NEXT: mtc1 $2, $f1
+; FP64-NEXT: mthc1 $1, $f1
; FP64-NEXT: sub.d $f0, $f0, $f1
; FP64-NEXT: cvt.s.d $f0, $f0
; FP64-NEXT: jr $ra
@@ -264,10 +264,10 @@ define double @u32tof64(i32 zeroext %a) {
; FP32-NEXT: lui $1, 17200
; FP32-NEXT: mtc1 $4, $f0
; FP32-NEXT: mtc1 $1, $f1
-; FP32-NEXT: lui $2, 17200
-; FP32-NEXT: ori $1, $zero, 0
-; FP32-NEXT: mtc1 $1, $f2
-; FP32-NEXT: mtc1 $2, $f3
+; FP32-NEXT: lui $1, 17200
+; FP32-NEXT: ori $2, $zero, 0
+; FP32-NEXT: mtc1 $2, $f2
+; FP32-NEXT: mtc1 $1, $f3
; FP32-NEXT: sub.d $f0, $f0, $f2
; FP32-NEXT: jr $ra
; FP32-NEXT: nop
@@ -277,10 +277,10 @@ define double @u32tof64(i32 zeroext %a) {
; FP64-NEXT: lui $1, 17200
; FP64-NEXT: mtc1 $4, $f0
; FP64-NEXT: mthc1 $1, $f0
-; FP64-NEXT: lui $2, 17200
-; FP64-NEXT: ori $1, $zero, 0
-; FP64-NEXT: mtc1 $1, $f1
-; FP64-NEXT: mthc1 $2, $f1
+; FP64-NEXT: lui $1, 17200
+; FP64-NEXT: ori $2, $zero, 0
+; FP64-NEXT: mtc1 $2, $f1
+; FP64-NEXT: mthc1 $1, $f1
; FP64-NEXT: sub.d $f0, $f0, $f1
; FP64-NEXT: jr $ra
; FP64-NEXT: nop
@@ -296,10 +296,10 @@ define double @u16tof64(i16 zeroext %a) {
; FP32-NEXT: lui $2, 17200
; FP32-NEXT: mtc1 $1, $f0
; FP32-NEXT: mtc1 $2, $f1
-; FP32-NEXT: lui $2, 17200
-; FP32-NEXT: ori $1, $zero, 0
-; FP32-NEXT: mtc1 $1, $f2
-; FP32-NEXT: mtc1 $2, $f3
+; FP32-NEXT: lui $1, 17200
+; FP32-NEXT: ori $2, $zero, 0
+; FP32-NEXT: mtc1 $2, $f2
+; FP32-NEXT: mtc1 $1, $f3
; FP32-NEXT: sub.d $f0, $f0, $f2
; FP32-NEXT: jr $ra
; FP32-NEXT: nop
@@ -310,10 +310,10 @@ define double @u16tof64(i16 zeroext %a) {
; FP64-NEXT: lui $2, 17200
; FP64-NEXT: mtc1 $1, $f0
; FP64-NEXT: mthc1 $2, $f0
-; FP64-NEXT: lui $2, 17200
-; FP64-NEXT: ori $1, $zero, 0
-; FP64-NEXT: mtc1 $1, $f1
-; FP64-NEXT: mthc1 $2, $f1
+; FP64-NEXT: lui $1, 17200
+; FP64-NEXT: ori $2, $zero, 0
+; FP64-NEXT: mtc1 $2, $f1
+; FP64-NEXT: mthc1 $1, $f1
; FP64-NEXT: sub.d $f0, $f0, $f1
; FP64-NEXT: jr $ra
; FP64-NEXT: nop
@@ -329,10 +329,10 @@ define double @u8tof64(i8 zeroext %a) {
; FP32-NEXT: lui $2, 17200
; FP32-NEXT: mtc1 $1, $f0
; FP32-NEXT: mtc1 $2, $f1
-; FP32-NEXT: lui $2, 17200
-; FP32-NEXT: ori $1, $zero, 0
-; FP32-NEXT: mtc1 $1, $f2
-; FP32-NEXT: mtc1 $2, $f3
+; FP32-NEXT: lui $1, 17200
+; FP32-NEXT: ori $2, $zero, 0
+; FP32-NEXT: mtc1 $2, $f2
+; FP32-NEXT: mtc1 $1, $f3
; FP32-NEXT: sub.d $f0, $f0, $f2
; FP32-NEXT: jr $ra
; FP32-NEXT: nop
@@ -343,10 +343,10 @@ define double @u8tof64(i8 zeroext %a) {
; FP64-NEXT: lui $2, 17200
; FP64-NEXT: mtc1 $1, $f0
; FP64-NEXT: mthc1 $2, $f0
-; FP64-NEXT: lui $2, 17200
-; FP64-NEXT: ori $1, $zero, 0
-; FP64-NEXT: mtc1 $1, $f1
-; FP64-NEXT: mthc1 $2, $f1
+; FP64-NEXT: lui $1, 17200
+; FP64-NEXT: ori $2, $zero, 0
+; FP64-NEXT: mtc1 $2, $f1
+; FP64-NEXT: mthc1 $1, $f1
; FP64-NEXT: sub.d $f0, $f0, $f1
; FP64-NEXT: jr $ra
; FP64-NEXT: nop
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_4_unaligned.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_4_unaligned.ll
index 256655a05469..37c40392e7a0 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_4_unaligned.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_4_unaligned.ll
@@ -15,10 +15,10 @@ define void @store_float_align1(float %a) {
; MIPS32-LABEL: store_float_align1:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $1, %hi(float_align1)
-; MIPS32-NEXT: addiu $2, $1, %lo(float_align1)
-; MIPS32-NEXT: mfc1 $1, $f12
-; MIPS32-NEXT: swl $1, 3($2)
-; MIPS32-NEXT: swr $1, 0($2)
+; MIPS32-NEXT: addiu $1, $1, %lo(float_align1)
+; MIPS32-NEXT: mfc1 $2, $f12
+; MIPS32-NEXT: swl $2, 3($1)
+; MIPS32-NEXT: swr $2, 0($1)
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
@@ -37,10 +37,10 @@ define void @store_float_align2(float %a) {
; MIPS32-LABEL: store_float_align2:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $1, %hi(float_align2)
-; MIPS32-NEXT: addiu $2, $1, %lo(float_align2)
-; MIPS32-NEXT: mfc1 $1, $f12
-; MIPS32-NEXT: swl $1, 3($2)
-; MIPS32-NEXT: swr $1, 0($2)
+; MIPS32-NEXT: addiu $1, $1, %lo(float_align2)
+; MIPS32-NEXT: mfc1 $2, $f12
+; MIPS32-NEXT: swl $2, 3($1)
+; MIPS32-NEXT: swr $2, 0($1)
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_split_because_of_memsize_or_align.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_split_because_of_memsize_or_align.ll
index 333b24a93684..7d068633a505 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_split_because_of_memsize_or_align.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_split_because_of_memsize_or_align.ll
@@ -204,12 +204,12 @@ define void @store6align1(%struct.MemSize6_Align1* %S, i64 %a) {
; MIPS32-LABEL: store6align1:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: ori $1, $zero, 4
-; MIPS32-NEXT: addu $2, $4, $1
+; MIPS32-NEXT: addu $1, $4, $1
; MIPS32-NEXT: swl $6, 3($4)
; MIPS32-NEXT: swr $6, 0($4)
; MIPS32-NEXT: sb $7, 4($4)
-; MIPS32-NEXT: srl $1, $7, 8
-; MIPS32-NEXT: sb $1, 1($2)
+; MIPS32-NEXT: srl $2, $7, 8
+; MIPS32-NEXT: sb $2, 1($1)
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
@@ -290,25 +290,25 @@ define void @store7align1(%struct.MemSize7_Align1* %S, i64 %a) {
; MIPS32-LABEL: store7align1:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: ori $1, $zero, 4
-; MIPS32-NEXT: addu $2, $4, $1
+; MIPS32-NEXT: addu $1, $4, $1
; MIPS32-NEXT: swl $6, 3($4)
; MIPS32-NEXT: swr $6, 0($4)
; MIPS32-NEXT: sb $7, 4($4)
-; MIPS32-NEXT: srl $1, $7, 8
-; MIPS32-NEXT: sb $1, 1($2)
-; MIPS32-NEXT: srl $1, $7, 16
-; MIPS32-NEXT: sb $1, 2($2)
+; MIPS32-NEXT: srl $2, $7, 8
+; MIPS32-NEXT: sb $2, 1($1)
+; MIPS32-NEXT: srl $2, $7, 16
+; MIPS32-NEXT: sb $2, 2($1)
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32R6-LABEL: store7align1:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: ori $1, $zero, 4
-; MIPS32R6-NEXT: addu $2, $4, $1
+; MIPS32R6-NEXT: addu $1, $4, $1
; MIPS32R6-NEXT: sw $6, 0($4)
; MIPS32R6-NEXT: sh $7, 4($4)
-; MIPS32R6-NEXT: srl $1, $7, 16
-; MIPS32R6-NEXT: sb $1, 2($2)
+; MIPS32R6-NEXT: srl $2, $7, 16
+; MIPS32R6-NEXT: sb $2, 2($1)
; MIPS32R6-NEXT: jrc $ra
entry:
%0 = bitcast %struct.MemSize7_Align1* %S to i56*
@@ -321,23 +321,23 @@ define void @store7align2(%struct.MemSize7_Align2* %S, i64 %a) {
; MIPS32-LABEL: store7align2:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: ori $1, $zero, 4
-; MIPS32-NEXT: addu $2, $4, $1
+; MIPS32-NEXT: addu $1, $4, $1
; MIPS32-NEXT: swl $6, 3($4)
; MIPS32-NEXT: swr $6, 0($4)
; MIPS32-NEXT: sh $7, 4($4)
-; MIPS32-NEXT: srl $1, $7, 16
-; MIPS32-NEXT: sb $1, 2($2)
+; MIPS32-NEXT: srl $2, $7, 16
+; MIPS32-NEXT: sb $2, 2($1)
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32R6-LABEL: store7align2:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: ori $1, $zero, 4
-; MIPS32R6-NEXT: addu $2, $4, $1
+; MIPS32R6-NEXT: addu $1, $4, $1
; MIPS32R6-NEXT: sw $6, 0($4)
; MIPS32R6-NEXT: sh $7, 4($4)
-; MIPS32R6-NEXT: srl $1, $7, 16
-; MIPS32R6-NEXT: sb $1, 2($2)
+; MIPS32R6-NEXT: srl $2, $7, 16
+; MIPS32R6-NEXT: sb $2, 2($1)
; MIPS32R6-NEXT: jrc $ra
entry:
%0 = bitcast %struct.MemSize7_Align2* %S to i56*
@@ -350,22 +350,22 @@ define void @store7align4(%struct.MemSize7_Align4* %S, i64 %a) {
; MIPS32-LABEL: store7align4:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: ori $1, $zero, 4
-; MIPS32-NEXT: addu $2, $4, $1
+; MIPS32-NEXT: addu $1, $4, $1
; MIPS32-NEXT: sw $6, 0($4)
; MIPS32-NEXT: sh $7, 4($4)
-; MIPS32-NEXT: srl $1, $7, 16
-; MIPS32-NEXT: sb $1, 2($2)
+; MIPS32-NEXT: srl $2, $7, 16
+; MIPS32-NEXT: sb $2, 2($1)
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32R6-LABEL: store7align4:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: ori $1, $zero, 4
-; MIPS32R6-NEXT: addu $2, $4, $1
+; MIPS32R6-NEXT: addu $1, $4, $1
; MIPS32R6-NEXT: sw $6, 0($4)
; MIPS32R6-NEXT: sh $7, 4($4)
-; MIPS32R6-NEXT: srl $1, $7, 16
-; MIPS32R6-NEXT: sb $1, 2($2)
+; MIPS32R6-NEXT: srl $2, $7, 16
+; MIPS32R6-NEXT: sb $2, 2($1)
; MIPS32R6-NEXT: jrc $ra
entry:
%0 = bitcast %struct.MemSize7_Align4* %S to i56*
@@ -378,22 +378,22 @@ define void @store7align8(%struct.MemSize7_Align8* %S, i64 %a) {
; MIPS32-LABEL: store7align8:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: ori $1, $zero, 4
-; MIPS32-NEXT: addu $2, $4, $1
+; MIPS32-NEXT: addu $1, $4, $1
; MIPS32-NEXT: sw $6, 0($4)
; MIPS32-NEXT: sh $7, 4($4)
-; MIPS32-NEXT: srl $1, $7, 16
-; MIPS32-NEXT: sb $1, 2($2)
+; MIPS32-NEXT: srl $2, $7, 16
+; MIPS32-NEXT: sb $2, 2($1)
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32R6-LABEL: store7align8:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: ori $1, $zero, 4
-; MIPS32R6-NEXT: addu $2, $4, $1
+; MIPS32R6-NEXT: addu $1, $4, $1
; MIPS32R6-NEXT: sw $6, 0($4)
; MIPS32R6-NEXT: sh $7, 4($4)
-; MIPS32R6-NEXT: srl $1, $7, 16
-; MIPS32R6-NEXT: sb $1, 2($2)
+; MIPS32R6-NEXT: srl $2, $7, 16
+; MIPS32R6-NEXT: sb $2, 2($1)
; MIPS32R6-NEXT: jrc $ra
entry:
%0 = bitcast %struct.MemSize7_Align8* %S to i56*
@@ -406,13 +406,13 @@ define void @store_double_align1(double %a) {
; MIPS32-LABEL: store_double_align1:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $1, %hi(double_align1)
-; MIPS32-NEXT: addiu $2, $1, %lo(double_align1)
-; MIPS32-NEXT: mfc1 $3, $f12
-; MIPS32-NEXT: mfc1 $1, $f13
-; MIPS32-NEXT: swl $3, 3($2)
-; MIPS32-NEXT: swr $3, 0($2)
-; MIPS32-NEXT: swl $1, 7($2)
-; MIPS32-NEXT: swr $1, 4($2)
+; MIPS32-NEXT: addiu $1, $1, %lo(double_align1)
+; MIPS32-NEXT: mfc1 $2, $f12
+; MIPS32-NEXT: mfc1 $3, $f13
+; MIPS32-NEXT: swl $2, 3($1)
+; MIPS32-NEXT: swr $2, 0($1)
+; MIPS32-NEXT: swl $3, 7($1)
+; MIPS32-NEXT: swr $3, 4($1)
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
@@ -431,13 +431,13 @@ define void @store_double_align2(double %a) {
; MIPS32-LABEL: store_double_align2:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $1, %hi(double_align2)
-; MIPS32-NEXT: addiu $2, $1, %lo(double_align2)
-; MIPS32-NEXT: mfc1 $3, $f12
-; MIPS32-NEXT: mfc1 $1, $f13
-; MIPS32-NEXT: swl $3, 3($2)
-; MIPS32-NEXT: swr $3, 0($2)
-; MIPS32-NEXT: swl $1, 7($2)
-; MIPS32-NEXT: swr $1, 4($2)
+; MIPS32-NEXT: addiu $1, $1, %lo(double_align2)
+; MIPS32-NEXT: mfc1 $2, $f12
+; MIPS32-NEXT: mfc1 $3, $f13
+; MIPS32-NEXT: swl $2, 3($1)
+; MIPS32-NEXT: swr $2, 0($1)
+; MIPS32-NEXT: swl $3, 7($1)
+; MIPS32-NEXT: swr $3, 4($1)
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
@@ -456,11 +456,11 @@ define void @store_double_align4(double %a) {
; MIPS32-LABEL: store_double_align4:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $1, %hi(double_align4)
-; MIPS32-NEXT: addiu $2, $1, %lo(double_align4)
-; MIPS32-NEXT: mfc1 $3, $f12
-; MIPS32-NEXT: mfc1 $1, $f13
-; MIPS32-NEXT: sw $3, 0($2)
-; MIPS32-NEXT: sw $1, 4($2)
+; MIPS32-NEXT: addiu $1, $1, %lo(double_align4)
+; MIPS32-NEXT: mfc1 $2, $f12
+; MIPS32-NEXT: mfc1 $3, $f13
+; MIPS32-NEXT: sw $2, 0($1)
+; MIPS32-NEXT: sw $3, 4($1)
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll
index ac98a1be898d..66dc761c5fa3 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll
@@ -88,10 +88,10 @@ define i64 @sub_i64(i64 %a, i64 %b) {
; MIPS32-LABEL: sub_i64:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: subu $2, $6, $4
-; MIPS32-NEXT: sltu $3, $6, $4
-; MIPS32-NEXT: subu $1, $7, $5
-; MIPS32-NEXT: andi $3, $3, 1
-; MIPS32-NEXT: subu $3, $1, $3
+; MIPS32-NEXT: sltu $1, $6, $4
+; MIPS32-NEXT: subu $3, $7, $5
+; MIPS32-NEXT: andi $1, $1, 1
+; MIPS32-NEXT: subu $3, $3, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -102,37 +102,38 @@ entry:
define i128 @sub_i128(i128 %a, i128 %b) {
; MIPS32-LABEL: sub_i128:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: move $10, $5
-; MIPS32-NEXT: move $9, $6
; MIPS32-NEXT: addiu $1, $sp, 16
-; MIPS32-NEXT: lw $3, 0($1)
-; MIPS32-NEXT: addiu $1, $sp, 20
-; MIPS32-NEXT: lw $6, 0($1)
-; MIPS32-NEXT: addiu $1, $sp, 24
-; MIPS32-NEXT: lw $5, 0($1)
-; MIPS32-NEXT: addiu $1, $sp, 28
; MIPS32-NEXT: lw $1, 0($1)
-; MIPS32-NEXT: subu $2, $3, $4
-; MIPS32-NEXT: sltu $4, $3, $4
-; MIPS32-NEXT: subu $3, $6, $10
-; MIPS32-NEXT: andi $8, $4, 1
-; MIPS32-NEXT: subu $3, $3, $8
-; MIPS32-NEXT: xor $8, $6, $10
-; MIPS32-NEXT: sltiu $8, $8, 1
-; MIPS32-NEXT: sltu $6, $6, $10
-; MIPS32-NEXT: andi $8, $8, 1
-; MIPS32-NEXT: movn $6, $4, $8
-; MIPS32-NEXT: subu $4, $5, $9
-; MIPS32-NEXT: andi $8, $6, 1
-; MIPS32-NEXT: subu $4, $4, $8
-; MIPS32-NEXT: xor $8, $5, $9
-; MIPS32-NEXT: sltiu $8, $8, 1
-; MIPS32-NEXT: sltu $5, $5, $9
-; MIPS32-NEXT: andi $8, $8, 1
-; MIPS32-NEXT: movn $5, $6, $8
-; MIPS32-NEXT: subu $1, $1, $7
+; MIPS32-NEXT: addiu $2, $sp, 20
+; MIPS32-NEXT: lw $2, 0($2)
+; MIPS32-NEXT: addiu $3, $sp, 24
+; MIPS32-NEXT: lw $3, 0($3)
+; MIPS32-NEXT: addiu $8, $sp, 28
+; MIPS32-NEXT: lw $8, 0($8)
+; MIPS32-NEXT: subu $9, $1, $4
+; MIPS32-NEXT: sltu $1, $1, $4
+; MIPS32-NEXT: subu $4, $2, $5
+; MIPS32-NEXT: andi $10, $1, 1
+; MIPS32-NEXT: subu $4, $4, $10
+; MIPS32-NEXT: xor $10, $2, $5
+; MIPS32-NEXT: sltiu $10, $10, 1
+; MIPS32-NEXT: sltu $2, $2, $5
+; MIPS32-NEXT: andi $5, $10, 1
+; MIPS32-NEXT: movn $2, $1, $5
+; MIPS32-NEXT: subu $1, $3, $6
+; MIPS32-NEXT: andi $5, $2, 1
+; MIPS32-NEXT: subu $1, $1, $5
+; MIPS32-NEXT: xor $5, $3, $6
+; MIPS32-NEXT: sltiu $5, $5, 1
+; MIPS32-NEXT: sltu $3, $3, $6
; MIPS32-NEXT: andi $5, $5, 1
-; MIPS32-NEXT: subu $5, $1, $5
+; MIPS32-NEXT: movn $3, $2, $5
+; MIPS32-NEXT: subu $2, $8, $7
+; MIPS32-NEXT: andi $3, $3, 1
+; MIPS32-NEXT: subu $5, $2, $3
+; MIPS32-NEXT: move $2, $9
+; MIPS32-NEXT: move $3, $4
+; MIPS32-NEXT: move $4, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub_vec.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub_vec.ll
index 8ce695f07362..6ad041d3a688 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub_vec.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub_vec.ll
@@ -4,9 +4,9 @@
define void @sub_v16i8(<16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %c) {
; P5600-LABEL: sub_v16i8:
; P5600: # %bb.0: # %entry
-; P5600-NEXT: ld.b $w1, 0($4)
-; P5600-NEXT: ld.b $w0, 0($5)
-; P5600-NEXT: subv.b $w0, $w0, $w1
+; P5600-NEXT: ld.b $w0, 0($4)
+; P5600-NEXT: ld.b $w1, 0($5)
+; P5600-NEXT: subv.b $w0, $w1, $w0
; P5600-NEXT: st.b $w0, 0($6)
; P5600-NEXT: jr $ra
; P5600-NEXT: nop
@@ -21,9 +21,9 @@ entry:
define void @sub_v8i16(<8 x i16>* %a, <8 x i16>* %b, <8 x i16>* %c) {
; P5600-LABEL: sub_v8i16:
; P5600: # %bb.0: # %entry
-; P5600-NEXT: ld.h $w1, 0($4)
-; P5600-NEXT: ld.h $w0, 0($5)
-; P5600-NEXT: subv.h $w0, $w0, $w1
+; P5600-NEXT: ld.h $w0, 0($4)
+; P5600-NEXT: ld.h $w1, 0($5)
+; P5600-NEXT: subv.h $w0, $w1, $w0
; P5600-NEXT: st.h $w0, 0($6)
; P5600-NEXT: jr $ra
; P5600-NEXT: nop
@@ -38,9 +38,9 @@ entry:
define void @sub_v4i32(<4 x i32>* %a, <4 x i32>* %b, <4 x i32>* %c) {
; P5600-LABEL: sub_v4i32:
; P5600: # %bb.0: # %entry
-; P5600-NEXT: ld.w $w1, 0($4)
-; P5600-NEXT: ld.w $w0, 0($5)
-; P5600-NEXT: subv.w $w0, $w0, $w1
+; P5600-NEXT: ld.w $w0, 0($4)
+; P5600-NEXT: ld.w $w1, 0($5)
+; P5600-NEXT: subv.w $w0, $w1, $w0
; P5600-NEXT: st.w $w0, 0($6)
; P5600-NEXT: jr $ra
; P5600-NEXT: nop
@@ -55,9 +55,9 @@ entry:
define void @sub_v2i64(<2 x i64>* %a, <2 x i64>* %b, <2 x i64>* %c) {
; P5600-LABEL: sub_v2i64:
; P5600: # %bb.0: # %entry
-; P5600-NEXT: ld.d $w1, 0($4)
-; P5600-NEXT: ld.d $w0, 0($5)
-; P5600-NEXT: subv.d $w0, $w0, $w1
+; P5600-NEXT: ld.d $w0, 0($4)
+; P5600-NEXT: ld.d $w1, 0($5)
+; P5600-NEXT: subv.d $w0, $w1, $w0
; P5600-NEXT: st.d $w0, 0($6)
; P5600-NEXT: jr $ra
; P5600-NEXT: nop
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/test_TypeInfoforMF.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/test_TypeInfoforMF.ll
index d81e3edf8dd3..25e87da5ae42 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/test_TypeInfoforMF.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/test_TypeInfoforMF.ll
@@ -26,9 +26,9 @@ entry:
define i32 @outgoing_gpr_instr(i32* %i32_ptr1, i32* %i32_ptr2) {
; MIPS32-LABEL: outgoing_gpr_instr:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: lw $2, 0($4)
-; MIPS32-NEXT: lw $1, 0($5)
-; MIPS32-NEXT: addu $2, $1, $2
+; MIPS32-NEXT: lw $1, 0($4)
+; MIPS32-NEXT: lw $2, 0($5)
+; MIPS32-NEXT: addu $2, $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -56,10 +56,10 @@ entry:
define i32 @incoming_gpr(i32 %incoming_phys_reg, i1 %test, i32* %a) {
; MIPS32-LABEL: incoming_gpr:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: move $2, $4
; MIPS32-NEXT: lw $1, 0($6)
-; MIPS32-NEXT: andi $3, $5, 1
-; MIPS32-NEXT: movn $2, $1, $3
+; MIPS32-NEXT: andi $2, $5, 1
+; MIPS32-NEXT: movn $4, $1, $2
+; MIPS32-NEXT: move $2, $4
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -71,10 +71,10 @@ entry:
define float @incoming_fpr(float %incoming_phys_reg, i1 %test, float* %a) {
; MIPS32-LABEL: incoming_fpr:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: mov.s $f0, $f12
-; MIPS32-NEXT: lwc1 $f1, 0($6)
+; MIPS32-NEXT: lwc1 $f0, 0($6)
; MIPS32-NEXT: andi $1, $5, 1
-; MIPS32-NEXT: movn.s $f0, $f1, $1
+; MIPS32-NEXT: movn.s $f12, $f0, $1
+; MIPS32-NEXT: mov.s $f0, $f12
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -103,10 +103,11 @@ entry:
define float @incoming_float_instr(float %val1, float %val2, float* %float_ptr, i1 %test) {
; MIPS32-LABEL: incoming_float_instr:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: lwc1 $f1, 0($6)
-; MIPS32-NEXT: add.s $f0, $f14, $f12
+; MIPS32-NEXT: lwc1 $f0, 0($6)
+; MIPS32-NEXT: add.s $f1, $f14, $f12
; MIPS32-NEXT: andi $1, $7, 1
-; MIPS32-NEXT: movn.s $f0, $f1, $1
+; MIPS32-NEXT: movn.s $f1, $f0, $1
+; MIPS32-NEXT: mov.s $f0, $f1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/var_arg.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/var_arg.ll
index fa6bf93d45d7..91a82f2a7fcc 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/var_arg.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/var_arg.ll
@@ -13,7 +13,6 @@ define void @testVaCopyArg(i8* %fmt, ...) {
; MIPS32-NEXT: .cfi_def_cfa_offset 40
; MIPS32-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill
; MIPS32-NEXT: .cfi_offset 31, -4
-; MIPS32-NEXT: move $3, $4
; MIPS32-NEXT: addiu $1, $sp, 44
; MIPS32-NEXT: sw $5, 0($1)
; MIPS32-NEXT: addiu $1, $sp, 48
@@ -21,23 +20,24 @@ define void @testVaCopyArg(i8* %fmt, ...) {
; MIPS32-NEXT: addiu $1, $sp, 52
; MIPS32-NEXT: sw $7, 0($1)
; MIPS32-NEXT: lui $1, %hi($.str)
-; MIPS32-NEXT: addiu $4, $1, %lo($.str)
-; MIPS32-NEXT: addiu $6, $sp, 32
-; MIPS32-NEXT: addiu $2, $sp, 28
+; MIPS32-NEXT: addiu $1, $1, %lo($.str)
+; MIPS32-NEXT: addiu $2, $sp, 32
+; MIPS32-NEXT: addiu $3, $sp, 28
; MIPS32-NEXT: addiu $5, $sp, 24
-; MIPS32-NEXT: addiu $1, $sp, 20
-; MIPS32-NEXT: sw $3, 0($6)
-; MIPS32-NEXT: addiu $3, $sp, 44
-; MIPS32-NEXT: sw $3, 0($2)
-; MIPS32-NEXT: lw $2, 0($2)
+; MIPS32-NEXT: addiu $6, $sp, 20
+; MIPS32-NEXT: sw $4, 0($2)
+; MIPS32-NEXT: addiu $2, $sp, 44
+; MIPS32-NEXT: sw $2, 0($3)
+; MIPS32-NEXT: lw $2, 0($3)
; MIPS32-NEXT: sw $2, 0($5)
; MIPS32-NEXT: lw $2, 0($5)
; MIPS32-NEXT: ori $3, $zero, 4
; MIPS32-NEXT: addu $3, $2, $3
; MIPS32-NEXT: sw $3, 0($5)
; MIPS32-NEXT: lw $2, 0($2)
-; MIPS32-NEXT: sw $2, 0($1)
-; MIPS32-NEXT: lw $5, 0($1)
+; MIPS32-NEXT: sw $2, 0($6)
+; MIPS32-NEXT: lw $5, 0($6)
+; MIPS32-NEXT: move $4, $1
; MIPS32-NEXT: jal printf
; MIPS32-NEXT: nop
; MIPS32-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zextLoad_and_sextLoad.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zextLoad_and_sextLoad.ll
index e42e0fe3a6b2..ec4252c17dee 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zextLoad_and_sextLoad.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zextLoad_and_sextLoad.ll
@@ -113,8 +113,9 @@ entry:
define i64 @load4_s32_to_sextLoad4_s64(i32* %px) {
; MIPS32-LABEL: load4_s32_to_sextLoad4_s64:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: lw $2, 0($4)
-; MIPS32-NEXT: sra $3, $2, 31
+; MIPS32-NEXT: lw $1, 0($4)
+; MIPS32-NEXT: sra $3, $1, 31
+; MIPS32-NEXT: move $2, $1
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zext_and_sext.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zext_and_sext.ll
index 1e2954542a06..7f54f810b8dd 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zext_and_sext.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zext_and_sext.ll
@@ -4,8 +4,8 @@
define i64 @zext(i32 %x) {
; MIPS32-LABEL: zext:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: move $2, $4
; MIPS32-NEXT: ori $3, $zero, 0
+; MIPS32-NEXT: move $2, $4
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
@@ -16,8 +16,8 @@ entry:
define i64 @sext(i32 %x) {
; MIPS32-LABEL: sext:
; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: sra $3, $4, 31
; MIPS32-NEXT: move $2, $4
-; MIPS32-NEXT: sra $3, $2, 31
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
entry:
diff --git a/llvm/test/CodeGen/Mips/atomic-min-max.ll b/llvm/test/CodeGen/Mips/atomic-min-max.ll
index 8fa95e6d5e4d..646af650c00e 100644
--- a/llvm/test/CodeGen/Mips/atomic-min-max.ll
+++ b/llvm/test/CodeGen/Mips/atomic-min-max.ll
@@ -829,38 +829,38 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS: # %bb.0: # %entry
; MIPS-NEXT: addiu $sp, $sp, -8
; MIPS-NEXT: .cfi_def_cfa_offset 8
-; MIPS-NEXT: # kill: def $at killed $a1
+; MIPS-NEXT: move $1, $5
; MIPS-NEXT: sync
-; MIPS-NEXT: addiu $1, $zero, -4
-; MIPS-NEXT: and $6, $4, $1
-; MIPS-NEXT: andi $1, $4, 3
-; MIPS-NEXT: xori $1, $1, 2
-; MIPS-NEXT: sll $10, $1, 3
-; MIPS-NEXT: ori $1, $zero, 65535
-; MIPS-NEXT: sllv $8, $1, $10
-; MIPS-NEXT: nor $9, $zero, $8
-; MIPS-NEXT: sllv $7, $5, $10
+; MIPS-NEXT: addiu $2, $zero, -4
+; MIPS-NEXT: and $2, $4, $2
+; MIPS-NEXT: andi $3, $4, 3
+; MIPS-NEXT: xori $3, $3, 2
+; MIPS-NEXT: sll $3, $3, 3
+; MIPS-NEXT: ori $4, $zero, 65535
+; MIPS-NEXT: sllv $4, $4, $3
+; MIPS-NEXT: nor $6, $zero, $4
+; MIPS-NEXT: sllv $5, $5, $3
; MIPS-NEXT: $BB4_1: # %entry
; MIPS-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS-NEXT: ll $2, 0($6)
-; MIPS-NEXT: slt $5, $2, $7
-; MIPS-NEXT: move $3, $2
-; MIPS-NEXT: movn $3, $7, $5
-; MIPS-NEXT: and $3, $3, $8
-; MIPS-NEXT: and $4, $2, $9
-; MIPS-NEXT: or $4, $4, $3
-; MIPS-NEXT: sc $4, 0($6)
-; MIPS-NEXT: beqz $4, $BB4_1
+; MIPS-NEXT: ll $8, 0($2)
+; MIPS-NEXT: slt $11, $8, $5
+; MIPS-NEXT: move $9, $8
+; MIPS-NEXT: movn $9, $5, $11
+; MIPS-NEXT: and $9, $9, $4
+; MIPS-NEXT: and $10, $8, $6
+; MIPS-NEXT: or $10, $10, $9
+; MIPS-NEXT: sc $10, 0($2)
+; MIPS-NEXT: beqz $10, $BB4_1
; MIPS-NEXT: nop
; MIPS-NEXT: # %bb.2: # %entry
-; MIPS-NEXT: and $1, $2, $8
-; MIPS-NEXT: srlv $1, $1, $10
-; MIPS-NEXT: seh $1, $1
+; MIPS-NEXT: and $7, $8, $4
+; MIPS-NEXT: srlv $7, $7, $3
+; MIPS-NEXT: seh $7, $7
; MIPS-NEXT: # %bb.3: # %entry
-; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS-NEXT: # %bb.4: # %entry
-; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS-NEXT: sync
+; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS-NEXT: addiu $sp, $sp, 8
; MIPS-NEXT: jr $ra
; MIPS-NEXT: nop
@@ -869,38 +869,38 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
; MIPSR6: # %bb.0: # %entry
; MIPSR6-NEXT: addiu $sp, $sp, -8
; MIPSR6-NEXT: .cfi_def_cfa_offset 8
-; MIPSR6-NEXT: # kill: def $at killed $a1
+; MIPSR6-NEXT: move $1, $5
; MIPSR6-NEXT: sync
-; MIPSR6-NEXT: addiu $1, $zero, -4
-; MIPSR6-NEXT: and $6, $4, $1
-; MIPSR6-NEXT: andi $1, $4, 3
-; MIPSR6-NEXT: xori $1, $1, 2
-; MIPSR6-NEXT: sll $10, $1, 3
-; MIPSR6-NEXT: ori $1, $zero, 65535
-; MIPSR6-NEXT: sllv $8, $1, $10
-; MIPSR6-NEXT: nor $9, $zero, $8
-; MIPSR6-NEXT: sllv $7, $5, $10
+; MIPSR6-NEXT: addiu $2, $zero, -4
+; MIPSR6-NEXT: and $2, $4, $2
+; MIPSR6-NEXT: andi $3, $4, 3
+; MIPSR6-NEXT: xori $3, $3, 2
+; MIPSR6-NEXT: sll $3, $3, 3
+; MIPSR6-NEXT: ori $4, $zero, 65535
+; MIPSR6-NEXT: sllv $4, $4, $3
+; MIPSR6-NEXT: nor $6, $zero, $4
+; MIPSR6-NEXT: sllv $5, $5, $3
; MIPSR6-NEXT: $BB4_1: # %entry
; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSR6-NEXT: ll $2, 0($6)
-; MIPSR6-NEXT: slt $5, $2, $7
-; MIPSR6-NEXT: seleqz $3, $2, $5
-; MIPSR6-NEXT: selnez $5, $7, $5
-; MIPSR6-NEXT: or $3, $3, $5
-; MIPSR6-NEXT: and $3, $3, $8
-; MIPSR6-NEXT: and $4, $2, $9
-; MIPSR6-NEXT: or $4, $4, $3
-; MIPSR6-NEXT: sc $4, 0($6)
-; MIPSR6-NEXT: beqzc $4, $BB4_1
+; MIPSR6-NEXT: ll $8, 0($2)
+; MIPSR6-NEXT: slt $11, $8, $5
+; MIPSR6-NEXT: seleqz $9, $8, $11
+; MIPSR6-NEXT: selnez $11, $5, $11
+; MIPSR6-NEXT: or $9, $9, $11
+; MIPSR6-NEXT: and $9, $9, $4
+; MIPSR6-NEXT: and $10, $8, $6
+; MIPSR6-NEXT: or $10, $10, $9
+; MIPSR6-NEXT: sc $10, 0($2)
+; MIPSR6-NEXT: beqzc $10, $BB4_1
; MIPSR6-NEXT: # %bb.2: # %entry
-; MIPSR6-NEXT: and $1, $2, $8
-; MIPSR6-NEXT: srlv $1, $1, $10
-; MIPSR6-NEXT: seh $1, $1
+; MIPSR6-NEXT: and $7, $8, $4
+; MIPSR6-NEXT: srlv $7, $7, $3
+; MIPSR6-NEXT: seh $7, $7
; MIPSR6-NEXT: # %bb.3: # %entry
-; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSR6-NEXT: # %bb.4: # %entry
-; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSR6-NEXT: sync
+; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSR6-NEXT: addiu $sp, $sp, 8
; MIPSR6-NEXT: jrc $ra
;
@@ -908,37 +908,37 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
; MM: # %bb.0: # %entry
; MM-NEXT: addiu $sp, $sp, -8
; MM-NEXT: .cfi_def_cfa_offset 8
-; MM-NEXT: # kill: def $at killed $a1
+; MM-NEXT: move $1, $5
; MM-NEXT: sync
-; MM-NEXT: addiu $1, $zero, -4
-; MM-NEXT: and $6, $4, $1
-; MM-NEXT: andi $1, $4, 3
-; MM-NEXT: xori $1, $1, 2
-; MM-NEXT: sll $10, $1, 3
-; MM-NEXT: ori $1, $zero, 65535
-; MM-NEXT: sllv $8, $1, $10
-; MM-NEXT: nor $9, $zero, $8
-; MM-NEXT: sllv $7, $5, $10
+; MM-NEXT: addiu $2, $zero, -4
+; MM-NEXT: and $2, $4, $2
+; MM-NEXT: andi $3, $4, 3
+; MM-NEXT: xori $3, $3, 2
+; MM-NEXT: sll $3, $3, 3
+; MM-NEXT: ori $4, $zero, 65535
+; MM-NEXT: sllv $4, $4, $3
+; MM-NEXT: nor $6, $zero, $4
+; MM-NEXT: sllv $5, $5, $3
; MM-NEXT: $BB4_1: # %entry
; MM-NEXT: # =>This Inner Loop Header: Depth=1
-; MM-NEXT: ll $2, 0($6)
-; MM-NEXT: slt $5, $2, $7
-; MM-NEXT: or $3, $2, $zero
-; MM-NEXT: movn $3, $7, $5
-; MM-NEXT: and $3, $3, $8
-; MM-NEXT: and $4, $2, $9
-; MM-NEXT: or $4, $4, $3
-; MM-NEXT: sc $4, 0($6)
-; MM-NEXT: beqzc $4, $BB4_1
+; MM-NEXT: ll $8, 0($2)
+; MM-NEXT: slt $11, $8, $5
+; MM-NEXT: or $9, $8, $zero
+; MM-NEXT: movn $9, $5, $11
+; MM-NEXT: and $9, $9, $4
+; MM-NEXT: and $10, $8, $6
+; MM-NEXT: or $10, $10, $9
+; MM-NEXT: sc $10, 0($2)
+; MM-NEXT: beqzc $10, $BB4_1
; MM-NEXT: # %bb.2: # %entry
-; MM-NEXT: and $1, $2, $8
-; MM-NEXT: srlv $1, $1, $10
-; MM-NEXT: seh $1, $1
+; MM-NEXT: and $7, $8, $4
+; MM-NEXT: srlv $7, $7, $3
+; MM-NEXT: seh $7, $7
; MM-NEXT: # %bb.3: # %entry
-; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MM-NEXT: # %bb.4: # %entry
-; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MM-NEXT: sync
+; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MM-NEXT: addiusp 8
; MM-NEXT: jrc $ra
;
@@ -946,38 +946,38 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
; MMR6: # %bb.0: # %entry
; MMR6-NEXT: addiu $sp, $sp, -8
; MMR6-NEXT: .cfi_def_cfa_offset 8
-; MMR6-NEXT: # kill: def $at killed $a1
+; MMR6-NEXT: move $1, $5
; MMR6-NEXT: sync
-; MMR6-NEXT: addiu $1, $zero, -4
-; MMR6-NEXT: and $6, $4, $1
-; MMR6-NEXT: andi $1, $4, 3
-; MMR6-NEXT: xori $1, $1, 2
-; MMR6-NEXT: sll $10, $1, 3
-; MMR6-NEXT: ori $1, $zero, 65535
-; MMR6-NEXT: sllv $8, $1, $10
-; MMR6-NEXT: nor $9, $zero, $8
-; MMR6-NEXT: sllv $7, $5, $10
+; MMR6-NEXT: addiu $2, $zero, -4
+; MMR6-NEXT: and $2, $4, $2
+; MMR6-NEXT: andi $3, $4, 3
+; MMR6-NEXT: xori $3, $3, 2
+; MMR6-NEXT: sll $3, $3, 3
+; MMR6-NEXT: ori $4, $zero, 65535
+; MMR6-NEXT: sllv $4, $4, $3
+; MMR6-NEXT: nor $6, $zero, $4
+; MMR6-NEXT: sllv $5, $5, $3
; MMR6-NEXT: $BB4_1: # %entry
; MMR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MMR6-NEXT: ll $2, 0($6)
-; MMR6-NEXT: slt $5, $2, $7
-; MMR6-NEXT: seleqz $3, $2, $5
-; MMR6-NEXT: selnez $5, $7, $5
-; MMR6-NEXT: or $3, $3, $5
-; MMR6-NEXT: and $3, $3, $8
-; MMR6-NEXT: and $4, $2, $9
-; MMR6-NEXT: or $4, $4, $3
-; MMR6-NEXT: sc $4, 0($6)
-; MMR6-NEXT: beqc $4, $zero, $BB4_1
+; MMR6-NEXT: ll $8, 0($2)
+; MMR6-NEXT: slt $11, $8, $5
+; MMR6-NEXT: seleqz $9, $8, $11
+; MMR6-NEXT: selnez $11, $5, $11
+; MMR6-NEXT: or $9, $9, $11
+; MMR6-NEXT: and $9, $9, $4
+; MMR6-NEXT: and $10, $8, $6
+; MMR6-NEXT: or $10, $10, $9
+; MMR6-NEXT: sc $10, 0($2)
+; MMR6-NEXT: beqc $10, $zero, $BB4_1
; MMR6-NEXT: # %bb.2: # %entry
-; MMR6-NEXT: and $1, $2, $8
-; MMR6-NEXT: srlv $1, $1, $10
-; MMR6-NEXT: seh $1, $1
+; MMR6-NEXT: and $7, $8, $4
+; MMR6-NEXT: srlv $7, $7, $3
+; MMR6-NEXT: seh $7, $7
; MMR6-NEXT: # %bb.3: # %entry
-; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMR6-NEXT: # %bb.4: # %entry
-; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMR6-NEXT: sync
+; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMR6-NEXT: addiu $sp, $sp, 8
; MMR6-NEXT: jrc $ra
;
@@ -985,39 +985,39 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
; MIPSEL: # %bb.0: # %entry
; MIPSEL-NEXT: addiu $sp, $sp, -8
; MIPSEL-NEXT: .cfi_def_cfa_offset 8
-; MIPSEL-NEXT: # kill: def $at killed $a1
+; MIPSEL-NEXT: move $1, $5
; MIPSEL-NEXT: sync
-; MIPSEL-NEXT: addiu $1, $zero, -4
-; MIPSEL-NEXT: and $6, $4, $1
-; MIPSEL-NEXT: andi $1, $4, 3
-; MIPSEL-NEXT: sll $10, $1, 3
-; MIPSEL-NEXT: ori $1, $zero, 65535
-; MIPSEL-NEXT: sllv $8, $1, $10
-; MIPSEL-NEXT: nor $9, $zero, $8
-; MIPSEL-NEXT: sllv $7, $5, $10
+; MIPSEL-NEXT: addiu $2, $zero, -4
+; MIPSEL-NEXT: and $2, $4, $2
+; MIPSEL-NEXT: andi $3, $4, 3
+; MIPSEL-NEXT: sll $3, $3, 3
+; MIPSEL-NEXT: ori $4, $zero, 65535
+; MIPSEL-NEXT: sllv $4, $4, $3
+; MIPSEL-NEXT: nor $6, $zero, $4
+; MIPSEL-NEXT: sllv $5, $5, $3
; MIPSEL-NEXT: $BB4_1: # %entry
; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSEL-NEXT: ll $2, 0($6)
-; MIPSEL-NEXT: and $2, $2, $8
-; MIPSEL-NEXT: and $7, $7, $8
-; MIPSEL-NEXT: slt $5, $2, $7
-; MIPSEL-NEXT: move $3, $2
-; MIPSEL-NEXT: movn $3, $7, $5
-; MIPSEL-NEXT: and $3, $3, $8
-; MIPSEL-NEXT: and $4, $2, $9
-; MIPSEL-NEXT: or $4, $4, $3
-; MIPSEL-NEXT: sc $4, 0($6)
-; MIPSEL-NEXT: beqz $4, $BB4_1
+; MIPSEL-NEXT: ll $8, 0($2)
+; MIPSEL-NEXT: and $8, $8, $4
+; MIPSEL-NEXT: and $5, $5, $4
+; MIPSEL-NEXT: slt $11, $8, $5
+; MIPSEL-NEXT: move $9, $8
+; MIPSEL-NEXT: movn $9, $5, $11
+; MIPSEL-NEXT: and $9, $9, $4
+; MIPSEL-NEXT: and $10, $8, $6
+; MIPSEL-NEXT: or $10, $10, $9
+; MIPSEL-NEXT: sc $10, 0($2)
+; MIPSEL-NEXT: beqz $10, $BB4_1
; MIPSEL-NEXT: nop
; MIPSEL-NEXT: # %bb.2: # %entry
-; MIPSEL-NEXT: and $1, $2, $8
-; MIPSEL-NEXT: srlv $1, $1, $10
-; MIPSEL-NEXT: seh $1, $1
+; MIPSEL-NEXT: and $7, $8, $4
+; MIPSEL-NEXT: srlv $7, $7, $3
+; MIPSEL-NEXT: seh $7, $7
; MIPSEL-NEXT: # %bb.3: # %entry
-; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSEL-NEXT: # %bb.4: # %entry
-; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSEL-NEXT: sync
+; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSEL-NEXT: addiu $sp, $sp, 8
; MIPSEL-NEXT: jr $ra
; MIPSEL-NEXT: nop
@@ -1026,39 +1026,39 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
; MIPSELR6: # %bb.0: # %entry
; MIPSELR6-NEXT: addiu $sp, $sp, -8
; MIPSELR6-NEXT: .cfi_def_cfa_offset 8
-; MIPSELR6-NEXT: # kill: def $at killed $a1
+; MIPSELR6-NEXT: move $1, $5
; MIPSELR6-NEXT: sync
-; MIPSELR6-NEXT: addiu $1, $zero, -4
-; MIPSELR6-NEXT: and $6, $4, $1
-; MIPSELR6-NEXT: andi $1, $4, 3
-; MIPSELR6-NEXT: sll $10, $1, 3
-; MIPSELR6-NEXT: ori $1, $zero, 65535
-; MIPSELR6-NEXT: sllv $8, $1, $10
-; MIPSELR6-NEXT: nor $9, $zero, $8
-; MIPSELR6-NEXT: sllv $7, $5, $10
+; MIPSELR6-NEXT: addiu $2, $zero, -4
+; MIPSELR6-NEXT: and $2, $4, $2
+; MIPSELR6-NEXT: andi $3, $4, 3
+; MIPSELR6-NEXT: sll $3, $3, 3
+; MIPSELR6-NEXT: ori $4, $zero, 65535
+; MIPSELR6-NEXT: sllv $4, $4, $3
+; MIPSELR6-NEXT: nor $6, $zero, $4
+; MIPSELR6-NEXT: sllv $5, $5, $3
; MIPSELR6-NEXT: $BB4_1: # %entry
; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSELR6-NEXT: ll $2, 0($6)
-; MIPSELR6-NEXT: and $2, $2, $8
-; MIPSELR6-NEXT: and $7, $7, $8
-; MIPSELR6-NEXT: slt $5, $2, $7
-; MIPSELR6-NEXT: seleqz $3, $2, $5
-; MIPSELR6-NEXT: selnez $5, $7, $5
-; MIPSELR6-NEXT: or $3, $3, $5
-; MIPSELR6-NEXT: and $3, $3, $8
-; MIPSELR6-NEXT: and $4, $2, $9
-; MIPSELR6-NEXT: or $4, $4, $3
-; MIPSELR6-NEXT: sc $4, 0($6)
-; MIPSELR6-NEXT: beqzc $4, $BB4_1
+; MIPSELR6-NEXT: ll $8, 0($2)
+; MIPSELR6-NEXT: and $8, $8, $4
+; MIPSELR6-NEXT: and $5, $5, $4
+; MIPSELR6-NEXT: slt $11, $8, $5
+; MIPSELR6-NEXT: seleqz $9, $8, $11
+; MIPSELR6-NEXT: selnez $11, $5, $11
+; MIPSELR6-NEXT: or $9, $9, $11
+; MIPSELR6-NEXT: and $9, $9, $4
+; MIPSELR6-NEXT: and $10, $8, $6
+; MIPSELR6-NEXT: or $10, $10, $9
+; MIPSELR6-NEXT: sc $10, 0($2)
+; MIPSELR6-NEXT: beqzc $10, $BB4_1
; MIPSELR6-NEXT: # %bb.2: # %entry
-; MIPSELR6-NEXT: and $1, $2, $8
-; MIPSELR6-NEXT: srlv $1, $1, $10
-; MIPSELR6-NEXT: seh $1, $1
+; MIPSELR6-NEXT: and $7, $8, $4
+; MIPSELR6-NEXT: srlv $7, $7, $3
+; MIPSELR6-NEXT: seh $7, $7
; MIPSELR6-NEXT: # %bb.3: # %entry
-; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSELR6-NEXT: # %bb.4: # %entry
-; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSELR6-NEXT: sync
+; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSELR6-NEXT: addiu $sp, $sp, 8
; MIPSELR6-NEXT: jrc $ra
;
@@ -1066,38 +1066,38 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
; MMEL: # %bb.0: # %entry
; MMEL-NEXT: addiu $sp, $sp, -8
; MMEL-NEXT: .cfi_def_cfa_offset 8
-; MMEL-NEXT: # kill: def $at killed $a1
+; MMEL-NEXT: move $1, $5
; MMEL-NEXT: sync
-; MMEL-NEXT: addiu $1, $zero, -4
-; MMEL-NEXT: and $6, $4, $1
-; MMEL-NEXT: andi $1, $4, 3
-; MMEL-NEXT: sll $10, $1, 3
-; MMEL-NEXT: ori $1, $zero, 65535
-; MMEL-NEXT: sllv $8, $1, $10
-; MMEL-NEXT: nor $9, $zero, $8
-; MMEL-NEXT: sllv $7, $5, $10
+; MMEL-NEXT: addiu $2, $zero, -4
+; MMEL-NEXT: and $2, $4, $2
+; MMEL-NEXT: andi $3, $4, 3
+; MMEL-NEXT: sll $3, $3, 3
+; MMEL-NEXT: ori $4, $zero, 65535
+; MMEL-NEXT: sllv $4, $4, $3
+; MMEL-NEXT: nor $6, $zero, $4
+; MMEL-NEXT: sllv $5, $5, $3
; MMEL-NEXT: $BB4_1: # %entry
; MMEL-NEXT: # =>This Inner Loop Header: Depth=1
-; MMEL-NEXT: ll $2, 0($6)
-; MMEL-NEXT: and $2, $2, $8
-; MMEL-NEXT: and $7, $7, $8
-; MMEL-NEXT: slt $5, $2, $7
-; MMEL-NEXT: or $3, $2, $zero
-; MMEL-NEXT: movn $3, $7, $5
-; MMEL-NEXT: and $3, $3, $8
-; MMEL-NEXT: and $4, $2, $9
-; MMEL-NEXT: or $4, $4, $3
-; MMEL-NEXT: sc $4, 0($6)
-; MMEL-NEXT: beqzc $4, $BB4_1
+; MMEL-NEXT: ll $8, 0($2)
+; MMEL-NEXT: and $8, $8, $4
+; MMEL-NEXT: and $5, $5, $4
+; MMEL-NEXT: slt $11, $8, $5
+; MMEL-NEXT: or $9, $8, $zero
+; MMEL-NEXT: movn $9, $5, $11
+; MMEL-NEXT: and $9, $9, $4
+; MMEL-NEXT: and $10, $8, $6
+; MMEL-NEXT: or $10, $10, $9
+; MMEL-NEXT: sc $10, 0($2)
+; MMEL-NEXT: beqzc $10, $BB4_1
; MMEL-NEXT: # %bb.2: # %entry
-; MMEL-NEXT: and $1, $2, $8
-; MMEL-NEXT: srlv $1, $1, $10
-; MMEL-NEXT: seh $1, $1
+; MMEL-NEXT: and $7, $8, $4
+; MMEL-NEXT: srlv $7, $7, $3
+; MMEL-NEXT: seh $7, $7
; MMEL-NEXT: # %bb.3: # %entry
-; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMEL-NEXT: # %bb.4: # %entry
-; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMEL-NEXT: sync
+; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMEL-NEXT: addiusp 8
; MMEL-NEXT: jrc $ra
;
@@ -1105,39 +1105,39 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
; MMELR6: # %bb.0: # %entry
; MMELR6-NEXT: addiu $sp, $sp, -8
; MMELR6-NEXT: .cfi_def_cfa_offset 8
-; MMELR6-NEXT: # kill: def $at killed $a1
+; MMELR6-NEXT: move $1, $5
; MMELR6-NEXT: sync
-; MMELR6-NEXT: addiu $1, $zero, -4
-; MMELR6-NEXT: and $6, $4, $1
-; MMELR6-NEXT: andi $1, $4, 3
-; MMELR6-NEXT: sll $10, $1, 3
-; MMELR6-NEXT: ori $1, $zero, 65535
-; MMELR6-NEXT: sllv $8, $1, $10
-; MMELR6-NEXT: nor $9, $zero, $8
-; MMELR6-NEXT: sllv $7, $5, $10
+; MMELR6-NEXT: addiu $2, $zero, -4
+; MMELR6-NEXT: and $2, $4, $2
+; MMELR6-NEXT: andi $3, $4, 3
+; MMELR6-NEXT: sll $3, $3, 3
+; MMELR6-NEXT: ori $4, $zero, 65535
+; MMELR6-NEXT: sllv $4, $4, $3
+; MMELR6-NEXT: nor $6, $zero, $4
+; MMELR6-NEXT: sllv $5, $5, $3
; MMELR6-NEXT: $BB4_1: # %entry
; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MMELR6-NEXT: ll $2, 0($6)
-; MMELR6-NEXT: and $2, $2, $8
-; MMELR6-NEXT: and $7, $7, $8
-; MMELR6-NEXT: slt $5, $2, $7
-; MMELR6-NEXT: seleqz $3, $2, $5
-; MMELR6-NEXT: selnez $5, $7, $5
-; MMELR6-NEXT: or $3, $3, $5
-; MMELR6-NEXT: and $3, $3, $8
-; MMELR6-NEXT: and $4, $2, $9
-; MMELR6-NEXT: or $4, $4, $3
-; MMELR6-NEXT: sc $4, 0($6)
-; MMELR6-NEXT: beqc $4, $zero, $BB4_1
+; MMELR6-NEXT: ll $8, 0($2)
+; MMELR6-NEXT: and $8, $8, $4
+; MMELR6-NEXT: and $5, $5, $4
+; MMELR6-NEXT: slt $11, $8, $5
+; MMELR6-NEXT: seleqz $9, $8, $11
+; MMELR6-NEXT: selnez $11, $5, $11
+; MMELR6-NEXT: or $9, $9, $11
+; MMELR6-NEXT: and $9, $9, $4
+; MMELR6-NEXT: and $10, $8, $6
+; MMELR6-NEXT: or $10, $10, $9
+; MMELR6-NEXT: sc $10, 0($2)
+; MMELR6-NEXT: beqc $10, $zero, $BB4_1
; MMELR6-NEXT: # %bb.2: # %entry
-; MMELR6-NEXT: and $1, $2, $8
-; MMELR6-NEXT: srlv $1, $1, $10
-; MMELR6-NEXT: seh $1, $1
+; MMELR6-NEXT: and $7, $8, $4
+; MMELR6-NEXT: srlv $7, $7, $3
+; MMELR6-NEXT: seh $7, $7
; MMELR6-NEXT: # %bb.3: # %entry
-; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMELR6-NEXT: # %bb.4: # %entry
-; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMELR6-NEXT: sync
+; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMELR6-NEXT: addiu $sp, $sp, 8
; MMELR6-NEXT: jrc $ra
;
@@ -1145,38 +1145,38 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: daddiu $sp, $sp, -16
; MIPS64-NEXT: .cfi_def_cfa_offset 16
-; MIPS64-NEXT: move $1, $5
+; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64-NEXT: sync
-; MIPS64-NEXT: daddiu $2, $zero, -4
-; MIPS64-NEXT: and $6, $4, $2
+; MIPS64-NEXT: daddiu $1, $zero, -4
+; MIPS64-NEXT: and $1, $4, $1
; MIPS64-NEXT: andi $2, $4, 3
; MIPS64-NEXT: xori $2, $2, 2
-; MIPS64-NEXT: sll $10, $2, 3
-; MIPS64-NEXT: ori $2, $zero, 65535
-; MIPS64-NEXT: sllv $8, $2, $10
-; MIPS64-NEXT: nor $9, $zero, $8
-; MIPS64-NEXT: sllv $7, $1, $10
+; MIPS64-NEXT: sll $2, $2, 3
+; MIPS64-NEXT: ori $3, $zero, 65535
+; MIPS64-NEXT: sllv $3, $3, $2
+; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB4_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $2, 0($6)
-; MIPS64-NEXT: slt $5, $2, $7
-; MIPS64-NEXT: move $3, $2
-; MIPS64-NEXT: movn $3, $7, $5
-; MIPS64-NEXT: and $3, $3, $8
-; MIPS64-NEXT: and $4, $2, $9
-; MIPS64-NEXT: or $4, $4, $3
-; MIPS64-NEXT: sc $4, 0($6)
-; MIPS64-NEXT: beqz $4, .LBB4_1
+; MIPS64-NEXT: ll $7, 0($1)
+; MIPS64-NEXT: slt $10, $7, $5
+; MIPS64-NEXT: move $8, $7
+; MIPS64-NEXT: movn $8, $5, $10
+; MIPS64-NEXT: and $8, $8, $3
+; MIPS64-NEXT: and $9, $7, $4
+; MIPS64-NEXT: or $9, $9, $8
+; MIPS64-NEXT: sc $9, 0($1)
+; MIPS64-NEXT: beqz $9, .LBB4_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $1, $2, $8
-; MIPS64-NEXT: srlv $1, $1, $10
-; MIPS64-NEXT: seh $1, $1
+; MIPS64-NEXT: and $6, $7, $3
+; MIPS64-NEXT: srlv $6, $6, $2
+; MIPS64-NEXT: seh $6, $6
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
-; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: sync
+; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: daddiu $sp, $sp, 16
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
@@ -1185,38 +1185,38 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: daddiu $sp, $sp, -16
; MIPS64R6-NEXT: .cfi_def_cfa_offset 16
-; MIPS64R6-NEXT: move $1, $5
+; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64R6-NEXT: sync
-; MIPS64R6-NEXT: daddiu $2, $zero, -4
-; MIPS64R6-NEXT: and $6, $4, $2
+; MIPS64R6-NEXT: daddiu $1, $zero, -4
+; MIPS64R6-NEXT: and $1, $4, $1
; MIPS64R6-NEXT: andi $2, $4, 3
; MIPS64R6-NEXT: xori $2, $2, 2
-; MIPS64R6-NEXT: sll $10, $2, 3
-; MIPS64R6-NEXT: ori $2, $zero, 65535
-; MIPS64R6-NEXT: sllv $8, $2, $10
-; MIPS64R6-NEXT: nor $9, $zero, $8
-; MIPS64R6-NEXT: sllv $7, $1, $10
+; MIPS64R6-NEXT: sll $2, $2, 3
+; MIPS64R6-NEXT: ori $3, $zero, 65535
+; MIPS64R6-NEXT: sllv $3, $3, $2
+; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB4_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $2, 0($6)
-; MIPS64R6-NEXT: slt $5, $2, $7
-; MIPS64R6-NEXT: seleqz $3, $2, $5
-; MIPS64R6-NEXT: selnez $5, $7, $5
-; MIPS64R6-NEXT: or $3, $3, $5
-; MIPS64R6-NEXT: and $3, $3, $8
-; MIPS64R6-NEXT: and $4, $2, $9
-; MIPS64R6-NEXT: or $4, $4, $3
-; MIPS64R6-NEXT: sc $4, 0($6)
-; MIPS64R6-NEXT: beqzc $4, .LBB4_1
+; MIPS64R6-NEXT: ll $7, 0($1)
+; MIPS64R6-NEXT: slt $10, $7, $5
+; MIPS64R6-NEXT: seleqz $8, $7, $10
+; MIPS64R6-NEXT: selnez $10, $5, $10
+; MIPS64R6-NEXT: or $8, $8, $10
+; MIPS64R6-NEXT: and $8, $8, $3
+; MIPS64R6-NEXT: and $9, $7, $4
+; MIPS64R6-NEXT: or $9, $9, $8
+; MIPS64R6-NEXT: sc $9, 0($1)
+; MIPS64R6-NEXT: beqzc $9, .LBB4_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $1, $2, $8
-; MIPS64R6-NEXT: srlv $1, $1, $10
-; MIPS64R6-NEXT: seh $1, $1
+; MIPS64R6-NEXT: and $6, $7, $3
+; MIPS64R6-NEXT: srlv $6, $6, $2
+; MIPS64R6-NEXT: seh $6, $6
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
-; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: sync
+; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: daddiu $sp, $sp, 16
; MIPS64R6-NEXT: jrc $ra
;
@@ -1224,39 +1224,39 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64EL: # %bb.0: # %entry
; MIPS64EL-NEXT: daddiu $sp, $sp, -16
; MIPS64EL-NEXT: .cfi_def_cfa_offset 16
-; MIPS64EL-NEXT: move $1, $5
+; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64EL-NEXT: sync
-; MIPS64EL-NEXT: daddiu $2, $zero, -4
-; MIPS64EL-NEXT: and $6, $4, $2
+; MIPS64EL-NEXT: daddiu $1, $zero, -4
+; MIPS64EL-NEXT: and $1, $4, $1
; MIPS64EL-NEXT: andi $2, $4, 3
-; MIPS64EL-NEXT: sll $10, $2, 3
-; MIPS64EL-NEXT: ori $2, $zero, 65535
-; MIPS64EL-NEXT: sllv $8, $2, $10
-; MIPS64EL-NEXT: nor $9, $zero, $8
-; MIPS64EL-NEXT: sllv $7, $1, $10
+; MIPS64EL-NEXT: sll $2, $2, 3
+; MIPS64EL-NEXT: ori $3, $zero, 65535
+; MIPS64EL-NEXT: sllv $3, $3, $2
+; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB4_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $2, 0($6)
-; MIPS64EL-NEXT: and $2, $2, $8
-; MIPS64EL-NEXT: and $7, $7, $8
-; MIPS64EL-NEXT: slt $5, $2, $7
-; MIPS64EL-NEXT: move $3, $2
-; MIPS64EL-NEXT: movn $3, $7, $5
-; MIPS64EL-NEXT: and $3, $3, $8
-; MIPS64EL-NEXT: and $4, $2, $9
-; MIPS64EL-NEXT: or $4, $4, $3
-; MIPS64EL-NEXT: sc $4, 0($6)
-; MIPS64EL-NEXT: beqz $4, .LBB4_1
+; MIPS64EL-NEXT: ll $7, 0($1)
+; MIPS64EL-NEXT: and $7, $7, $3
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: slt $10, $7, $5
+; MIPS64EL-NEXT: move $8, $7
+; MIPS64EL-NEXT: movn $8, $5, $10
+; MIPS64EL-NEXT: and $8, $8, $3
+; MIPS64EL-NEXT: and $9, $7, $4
+; MIPS64EL-NEXT: or $9, $9, $8
+; MIPS64EL-NEXT: sc $9, 0($1)
+; MIPS64EL-NEXT: beqz $9, .LBB4_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $1, $2, $8
-; MIPS64EL-NEXT: srlv $1, $1, $10
-; MIPS64EL-NEXT: seh $1, $1
+; MIPS64EL-NEXT: and $6, $7, $3
+; MIPS64EL-NEXT: srlv $6, $6, $2
+; MIPS64EL-NEXT: seh $6, $6
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
-; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: sync
+; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: daddiu $sp, $sp, 16
; MIPS64EL-NEXT: jr $ra
; MIPS64EL-NEXT: nop
@@ -1265,39 +1265,39 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64ELR6: # %bb.0: # %entry
; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16
; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16
-; MIPS64ELR6-NEXT: move $1, $5
+; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64ELR6-NEXT: sync
-; MIPS64ELR6-NEXT: daddiu $2, $zero, -4
-; MIPS64ELR6-NEXT: and $6, $4, $2
+; MIPS64ELR6-NEXT: daddiu $1, $zero, -4
+; MIPS64ELR6-NEXT: and $1, $4, $1
; MIPS64ELR6-NEXT: andi $2, $4, 3
-; MIPS64ELR6-NEXT: sll $10, $2, 3
-; MIPS64ELR6-NEXT: ori $2, $zero, 65535
-; MIPS64ELR6-NEXT: sllv $8, $2, $10
-; MIPS64ELR6-NEXT: nor $9, $zero, $8
-; MIPS64ELR6-NEXT: sllv $7, $1, $10
+; MIPS64ELR6-NEXT: sll $2, $2, 3
+; MIPS64ELR6-NEXT: ori $3, $zero, 65535
+; MIPS64ELR6-NEXT: sllv $3, $3, $2
+; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB4_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $2, 0($6)
-; MIPS64ELR6-NEXT: and $2, $2, $8
-; MIPS64ELR6-NEXT: and $7, $7, $8
-; MIPS64ELR6-NEXT: slt $5, $2, $7
-; MIPS64ELR6-NEXT: seleqz $3, $2, $5
-; MIPS64ELR6-NEXT: selnez $5, $7, $5
-; MIPS64ELR6-NEXT: or $3, $3, $5
-; MIPS64ELR6-NEXT: and $3, $3, $8
-; MIPS64ELR6-NEXT: and $4, $2, $9
-; MIPS64ELR6-NEXT: or $4, $4, $3
-; MIPS64ELR6-NEXT: sc $4, 0($6)
-; MIPS64ELR6-NEXT: beqzc $4, .LBB4_1
+; MIPS64ELR6-NEXT: ll $7, 0($1)
+; MIPS64ELR6-NEXT: and $7, $7, $3
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: slt $10, $7, $5
+; MIPS64ELR6-NEXT: seleqz $8, $7, $10
+; MIPS64ELR6-NEXT: selnez $10, $5, $10
+; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: and $8, $8, $3
+; MIPS64ELR6-NEXT: and $9, $7, $4
+; MIPS64ELR6-NEXT: or $9, $9, $8
+; MIPS64ELR6-NEXT: sc $9, 0($1)
+; MIPS64ELR6-NEXT: beqzc $9, .LBB4_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $1, $2, $8
-; MIPS64ELR6-NEXT: srlv $1, $1, $10
-; MIPS64ELR6-NEXT: seh $1, $1
+; MIPS64ELR6-NEXT: and $6, $7, $3
+; MIPS64ELR6-NEXT: srlv $6, $6, $2
+; MIPS64ELR6-NEXT: seh $6, $6
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
-; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: sync
+; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16
; MIPS64ELR6-NEXT: jrc $ra
entry:
@@ -1310,38 +1310,38 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS: # %bb.0: # %entry
; MIPS-NEXT: addiu $sp, $sp, -8
; MIPS-NEXT: .cfi_def_cfa_offset 8
-; MIPS-NEXT: # kill: def $at killed $a1
+; MIPS-NEXT: move $1, $5
; MIPS-NEXT: sync
-; MIPS-NEXT: addiu $1, $zero, -4
-; MIPS-NEXT: and $6, $4, $1
-; MIPS-NEXT: andi $1, $4, 3
-; MIPS-NEXT: xori $1, $1, 2
-; MIPS-NEXT: sll $10, $1, 3
-; MIPS-NEXT: ori $1, $zero, 65535
-; MIPS-NEXT: sllv $8, $1, $10
-; MIPS-NEXT: nor $9, $zero, $8
-; MIPS-NEXT: sllv $7, $5, $10
+; MIPS-NEXT: addiu $2, $zero, -4
+; MIPS-NEXT: and $2, $4, $2
+; MIPS-NEXT: andi $3, $4, 3
+; MIPS-NEXT: xori $3, $3, 2
+; MIPS-NEXT: sll $3, $3, 3
+; MIPS-NEXT: ori $4, $zero, 65535
+; MIPS-NEXT: sllv $4, $4, $3
+; MIPS-NEXT: nor $6, $zero, $4
+; MIPS-NEXT: sllv $5, $5, $3
; MIPS-NEXT: $BB5_1: # %entry
; MIPS-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS-NEXT: ll $2, 0($6)
-; MIPS-NEXT: slt $5, $2, $7
-; MIPS-NEXT: move $3, $2
-; MIPS-NEXT: movz $3, $7, $5
-; MIPS-NEXT: and $3, $3, $8
-; MIPS-NEXT: and $4, $2, $9
-; MIPS-NEXT: or $4, $4, $3
-; MIPS-NEXT: sc $4, 0($6)
-; MIPS-NEXT: beqz $4, $BB5_1
+; MIPS-NEXT: ll $8, 0($2)
+; MIPS-NEXT: slt $11, $8, $5
+; MIPS-NEXT: move $9, $8
+; MIPS-NEXT: movz $9, $5, $11
+; MIPS-NEXT: and $9, $9, $4
+; MIPS-NEXT: and $10, $8, $6
+; MIPS-NEXT: or $10, $10, $9
+; MIPS-NEXT: sc $10, 0($2)
+; MIPS-NEXT: beqz $10, $BB5_1
; MIPS-NEXT: nop
; MIPS-NEXT: # %bb.2: # %entry
-; MIPS-NEXT: and $1, $2, $8
-; MIPS-NEXT: srlv $1, $1, $10
-; MIPS-NEXT: seh $1, $1
+; MIPS-NEXT: and $7, $8, $4
+; MIPS-NEXT: srlv $7, $7, $3
+; MIPS-NEXT: seh $7, $7
; MIPS-NEXT: # %bb.3: # %entry
-; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS-NEXT: # %bb.4: # %entry
-; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS-NEXT: sync
+; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS-NEXT: addiu $sp, $sp, 8
; MIPS-NEXT: jr $ra
; MIPS-NEXT: nop
@@ -1350,38 +1350,38 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
; MIPSR6: # %bb.0: # %entry
; MIPSR6-NEXT: addiu $sp, $sp, -8
; MIPSR6-NEXT: .cfi_def_cfa_offset 8
-; MIPSR6-NEXT: # kill: def $at killed $a1
+; MIPSR6-NEXT: move $1, $5
; MIPSR6-NEXT: sync
-; MIPSR6-NEXT: addiu $1, $zero, -4
-; MIPSR6-NEXT: and $6, $4, $1
-; MIPSR6-NEXT: andi $1, $4, 3
-; MIPSR6-NEXT: xori $1, $1, 2
-; MIPSR6-NEXT: sll $10, $1, 3
-; MIPSR6-NEXT: ori $1, $zero, 65535
-; MIPSR6-NEXT: sllv $8, $1, $10
-; MIPSR6-NEXT: nor $9, $zero, $8
-; MIPSR6-NEXT: sllv $7, $5, $10
+; MIPSR6-NEXT: addiu $2, $zero, -4
+; MIPSR6-NEXT: and $2, $4, $2
+; MIPSR6-NEXT: andi $3, $4, 3
+; MIPSR6-NEXT: xori $3, $3, 2
+; MIPSR6-NEXT: sll $3, $3, 3
+; MIPSR6-NEXT: ori $4, $zero, 65535
+; MIPSR6-NEXT: sllv $4, $4, $3
+; MIPSR6-NEXT: nor $6, $zero, $4
+; MIPSR6-NEXT: sllv $5, $5, $3
; MIPSR6-NEXT: $BB5_1: # %entry
; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSR6-NEXT: ll $2, 0($6)
-; MIPSR6-NEXT: slt $5, $2, $7
-; MIPSR6-NEXT: selnez $3, $2, $5
-; MIPSR6-NEXT: seleqz $5, $7, $5
-; MIPSR6-NEXT: or $3, $3, $5
-; MIPSR6-NEXT: and $3, $3, $8
-; MIPSR6-NEXT: and $4, $2, $9
-; MIPSR6-NEXT: or $4, $4, $3
-; MIPSR6-NEXT: sc $4, 0($6)
-; MIPSR6-NEXT: beqzc $4, $BB5_1
+; MIPSR6-NEXT: ll $8, 0($2)
+; MIPSR6-NEXT: slt $11, $8, $5
+; MIPSR6-NEXT: selnez $9, $8, $11
+; MIPSR6-NEXT: seleqz $11, $5, $11
+; MIPSR6-NEXT: or $9, $9, $11
+; MIPSR6-NEXT: and $9, $9, $4
+; MIPSR6-NEXT: and $10, $8, $6
+; MIPSR6-NEXT: or $10, $10, $9
+; MIPSR6-NEXT: sc $10, 0($2)
+; MIPSR6-NEXT: beqzc $10, $BB5_1
; MIPSR6-NEXT: # %bb.2: # %entry
-; MIPSR6-NEXT: and $1, $2, $8
-; MIPSR6-NEXT: srlv $1, $1, $10
-; MIPSR6-NEXT: seh $1, $1
+; MIPSR6-NEXT: and $7, $8, $4
+; MIPSR6-NEXT: srlv $7, $7, $3
+; MIPSR6-NEXT: seh $7, $7
; MIPSR6-NEXT: # %bb.3: # %entry
-; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSR6-NEXT: # %bb.4: # %entry
-; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSR6-NEXT: sync
+; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSR6-NEXT: addiu $sp, $sp, 8
; MIPSR6-NEXT: jrc $ra
;
@@ -1389,37 +1389,37 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
; MM: # %bb.0: # %entry
; MM-NEXT: addiu $sp, $sp, -8
; MM-NEXT: .cfi_def_cfa_offset 8
-; MM-NEXT: # kill: def $at killed $a1
+; MM-NEXT: move $1, $5
; MM-NEXT: sync
-; MM-NEXT: addiu $1, $zero, -4
-; MM-NEXT: and $6, $4, $1
-; MM-NEXT: andi $1, $4, 3
-; MM-NEXT: xori $1, $1, 2
-; MM-NEXT: sll $10, $1, 3
-; MM-NEXT: ori $1, $zero, 65535
-; MM-NEXT: sllv $8, $1, $10
-; MM-NEXT: nor $9, $zero, $8
-; MM-NEXT: sllv $7, $5, $10
+; MM-NEXT: addiu $2, $zero, -4
+; MM-NEXT: and $2, $4, $2
+; MM-NEXT: andi $3, $4, 3
+; MM-NEXT: xori $3, $3, 2
+; MM-NEXT: sll $3, $3, 3
+; MM-NEXT: ori $4, $zero, 65535
+; MM-NEXT: sllv $4, $4, $3
+; MM-NEXT: nor $6, $zero, $4
+; MM-NEXT: sllv $5, $5, $3
; MM-NEXT: $BB5_1: # %entry
; MM-NEXT: # =>This Inner Loop Header: Depth=1
-; MM-NEXT: ll $2, 0($6)
-; MM-NEXT: slt $5, $2, $7
-; MM-NEXT: or $3, $2, $zero
-; MM-NEXT: movz $3, $7, $5
-; MM-NEXT: and $3, $3, $8
-; MM-NEXT: and $4, $2, $9
-; MM-NEXT: or $4, $4, $3
-; MM-NEXT: sc $4, 0($6)
-; MM-NEXT: beqzc $4, $BB5_1
+; MM-NEXT: ll $8, 0($2)
+; MM-NEXT: slt $11, $8, $5
+; MM-NEXT: or $9, $8, $zero
+; MM-NEXT: movz $9, $5, $11
+; MM-NEXT: and $9, $9, $4
+; MM-NEXT: and $10, $8, $6
+; MM-NEXT: or $10, $10, $9
+; MM-NEXT: sc $10, 0($2)
+; MM-NEXT: beqzc $10, $BB5_1
; MM-NEXT: # %bb.2: # %entry
-; MM-NEXT: and $1, $2, $8
-; MM-NEXT: srlv $1, $1, $10
-; MM-NEXT: seh $1, $1
+; MM-NEXT: and $7, $8, $4
+; MM-NEXT: srlv $7, $7, $3
+; MM-NEXT: seh $7, $7
; MM-NEXT: # %bb.3: # %entry
-; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MM-NEXT: # %bb.4: # %entry
-; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MM-NEXT: sync
+; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MM-NEXT: addiusp 8
; MM-NEXT: jrc $ra
;
@@ -1427,38 +1427,38 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
; MMR6: # %bb.0: # %entry
; MMR6-NEXT: addiu $sp, $sp, -8
; MMR6-NEXT: .cfi_def_cfa_offset 8
-; MMR6-NEXT: # kill: def $at killed $a1
+; MMR6-NEXT: move $1, $5
; MMR6-NEXT: sync
-; MMR6-NEXT: addiu $1, $zero, -4
-; MMR6-NEXT: and $6, $4, $1
-; MMR6-NEXT: andi $1, $4, 3
-; MMR6-NEXT: xori $1, $1, 2
-; MMR6-NEXT: sll $10, $1, 3
-; MMR6-NEXT: ori $1, $zero, 65535
-; MMR6-NEXT: sllv $8, $1, $10
-; MMR6-NEXT: nor $9, $zero, $8
-; MMR6-NEXT: sllv $7, $5, $10
+; MMR6-NEXT: addiu $2, $zero, -4
+; MMR6-NEXT: and $2, $4, $2
+; MMR6-NEXT: andi $3, $4, 3
+; MMR6-NEXT: xori $3, $3, 2
+; MMR6-NEXT: sll $3, $3, 3
+; MMR6-NEXT: ori $4, $zero, 65535
+; MMR6-NEXT: sllv $4, $4, $3
+; MMR6-NEXT: nor $6, $zero, $4
+; MMR6-NEXT: sllv $5, $5, $3
; MMR6-NEXT: $BB5_1: # %entry
; MMR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MMR6-NEXT: ll $2, 0($6)
-; MMR6-NEXT: slt $5, $2, $7
-; MMR6-NEXT: selnez $3, $2, $5
-; MMR6-NEXT: seleqz $5, $7, $5
-; MMR6-NEXT: or $3, $3, $5
-; MMR6-NEXT: and $3, $3, $8
-; MMR6-NEXT: and $4, $2, $9
-; MMR6-NEXT: or $4, $4, $3
-; MMR6-NEXT: sc $4, 0($6)
-; MMR6-NEXT: beqc $4, $zero, $BB5_1
+; MMR6-NEXT: ll $8, 0($2)
+; MMR6-NEXT: slt $11, $8, $5
+; MMR6-NEXT: selnez $9, $8, $11
+; MMR6-NEXT: seleqz $11, $5, $11
+; MMR6-NEXT: or $9, $9, $11
+; MMR6-NEXT: and $9, $9, $4
+; MMR6-NEXT: and $10, $8, $6
+; MMR6-NEXT: or $10, $10, $9
+; MMR6-NEXT: sc $10, 0($2)
+; MMR6-NEXT: beqc $10, $zero, $BB5_1
; MMR6-NEXT: # %bb.2: # %entry
-; MMR6-NEXT: and $1, $2, $8
-; MMR6-NEXT: srlv $1, $1, $10
-; MMR6-NEXT: seh $1, $1
+; MMR6-NEXT: and $7, $8, $4
+; MMR6-NEXT: srlv $7, $7, $3
+; MMR6-NEXT: seh $7, $7
; MMR6-NEXT: # %bb.3: # %entry
-; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMR6-NEXT: # %bb.4: # %entry
-; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMR6-NEXT: sync
+; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMR6-NEXT: addiu $sp, $sp, 8
; MMR6-NEXT: jrc $ra
;
@@ -1466,39 +1466,39 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
; MIPSEL: # %bb.0: # %entry
; MIPSEL-NEXT: addiu $sp, $sp, -8
; MIPSEL-NEXT: .cfi_def_cfa_offset 8
-; MIPSEL-NEXT: # kill: def $at killed $a1
+; MIPSEL-NEXT: move $1, $5
; MIPSEL-NEXT: sync
-; MIPSEL-NEXT: addiu $1, $zero, -4
-; MIPSEL-NEXT: and $6, $4, $1
-; MIPSEL-NEXT: andi $1, $4, 3
-; MIPSEL-NEXT: sll $10, $1, 3
-; MIPSEL-NEXT: ori $1, $zero, 65535
-; MIPSEL-NEXT: sllv $8, $1, $10
-; MIPSEL-NEXT: nor $9, $zero, $8
-; MIPSEL-NEXT: sllv $7, $5, $10
+; MIPSEL-NEXT: addiu $2, $zero, -4
+; MIPSEL-NEXT: and $2, $4, $2
+; MIPSEL-NEXT: andi $3, $4, 3
+; MIPSEL-NEXT: sll $3, $3, 3
+; MIPSEL-NEXT: ori $4, $zero, 65535
+; MIPSEL-NEXT: sllv $4, $4, $3
+; MIPSEL-NEXT: nor $6, $zero, $4
+; MIPSEL-NEXT: sllv $5, $5, $3
; MIPSEL-NEXT: $BB5_1: # %entry
; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSEL-NEXT: ll $2, 0($6)
-; MIPSEL-NEXT: and $2, $2, $8
-; MIPSEL-NEXT: and $7, $7, $8
-; MIPSEL-NEXT: slt $5, $2, $7
-; MIPSEL-NEXT: move $3, $2
-; MIPSEL-NEXT: movz $3, $7, $5
-; MIPSEL-NEXT: and $3, $3, $8
-; MIPSEL-NEXT: and $4, $2, $9
-; MIPSEL-NEXT: or $4, $4, $3
-; MIPSEL-NEXT: sc $4, 0($6)
-; MIPSEL-NEXT: beqz $4, $BB5_1
+; MIPSEL-NEXT: ll $8, 0($2)
+; MIPSEL-NEXT: and $8, $8, $4
+; MIPSEL-NEXT: and $5, $5, $4
+; MIPSEL-NEXT: slt $11, $8, $5
+; MIPSEL-NEXT: move $9, $8
+; MIPSEL-NEXT: movz $9, $5, $11
+; MIPSEL-NEXT: and $9, $9, $4
+; MIPSEL-NEXT: and $10, $8, $6
+; MIPSEL-NEXT: or $10, $10, $9
+; MIPSEL-NEXT: sc $10, 0($2)
+; MIPSEL-NEXT: beqz $10, $BB5_1
; MIPSEL-NEXT: nop
; MIPSEL-NEXT: # %bb.2: # %entry
-; MIPSEL-NEXT: and $1, $2, $8
-; MIPSEL-NEXT: srlv $1, $1, $10
-; MIPSEL-NEXT: seh $1, $1
+; MIPSEL-NEXT: and $7, $8, $4
+; MIPSEL-NEXT: srlv $7, $7, $3
+; MIPSEL-NEXT: seh $7, $7
; MIPSEL-NEXT: # %bb.3: # %entry
-; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSEL-NEXT: # %bb.4: # %entry
-; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSEL-NEXT: sync
+; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSEL-NEXT: addiu $sp, $sp, 8
; MIPSEL-NEXT: jr $ra
; MIPSEL-NEXT: nop
@@ -1507,39 +1507,39 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
; MIPSELR6: # %bb.0: # %entry
; MIPSELR6-NEXT: addiu $sp, $sp, -8
; MIPSELR6-NEXT: .cfi_def_cfa_offset 8
-; MIPSELR6-NEXT: # kill: def $at killed $a1
+; MIPSELR6-NEXT: move $1, $5
; MIPSELR6-NEXT: sync
-; MIPSELR6-NEXT: addiu $1, $zero, -4
-; MIPSELR6-NEXT: and $6, $4, $1
-; MIPSELR6-NEXT: andi $1, $4, 3
-; MIPSELR6-NEXT: sll $10, $1, 3
-; MIPSELR6-NEXT: ori $1, $zero, 65535
-; MIPSELR6-NEXT: sllv $8, $1, $10
-; MIPSELR6-NEXT: nor $9, $zero, $8
-; MIPSELR6-NEXT: sllv $7, $5, $10
+; MIPSELR6-NEXT: addiu $2, $zero, -4
+; MIPSELR6-NEXT: and $2, $4, $2
+; MIPSELR6-NEXT: andi $3, $4, 3
+; MIPSELR6-NEXT: sll $3, $3, 3
+; MIPSELR6-NEXT: ori $4, $zero, 65535
+; MIPSELR6-NEXT: sllv $4, $4, $3
+; MIPSELR6-NEXT: nor $6, $zero, $4
+; MIPSELR6-NEXT: sllv $5, $5, $3
; MIPSELR6-NEXT: $BB5_1: # %entry
; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSELR6-NEXT: ll $2, 0($6)
-; MIPSELR6-NEXT: and $2, $2, $8
-; MIPSELR6-NEXT: and $7, $7, $8
-; MIPSELR6-NEXT: slt $5, $2, $7
-; MIPSELR6-NEXT: selnez $3, $2, $5
-; MIPSELR6-NEXT: seleqz $5, $7, $5
-; MIPSELR6-NEXT: or $3, $3, $5
-; MIPSELR6-NEXT: and $3, $3, $8
-; MIPSELR6-NEXT: and $4, $2, $9
-; MIPSELR6-NEXT: or $4, $4, $3
-; MIPSELR6-NEXT: sc $4, 0($6)
-; MIPSELR6-NEXT: beqzc $4, $BB5_1
+; MIPSELR6-NEXT: ll $8, 0($2)
+; MIPSELR6-NEXT: and $8, $8, $4
+; MIPSELR6-NEXT: and $5, $5, $4
+; MIPSELR6-NEXT: slt $11, $8, $5
+; MIPSELR6-NEXT: selnez $9, $8, $11
+; MIPSELR6-NEXT: seleqz $11, $5, $11
+; MIPSELR6-NEXT: or $9, $9, $11
+; MIPSELR6-NEXT: and $9, $9, $4
+; MIPSELR6-NEXT: and $10, $8, $6
+; MIPSELR6-NEXT: or $10, $10, $9
+; MIPSELR6-NEXT: sc $10, 0($2)
+; MIPSELR6-NEXT: beqzc $10, $BB5_1
; MIPSELR6-NEXT: # %bb.2: # %entry
-; MIPSELR6-NEXT: and $1, $2, $8
-; MIPSELR6-NEXT: srlv $1, $1, $10
-; MIPSELR6-NEXT: seh $1, $1
+; MIPSELR6-NEXT: and $7, $8, $4
+; MIPSELR6-NEXT: srlv $7, $7, $3
+; MIPSELR6-NEXT: seh $7, $7
; MIPSELR6-NEXT: # %bb.3: # %entry
-; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSELR6-NEXT: # %bb.4: # %entry
-; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSELR6-NEXT: sync
+; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSELR6-NEXT: addiu $sp, $sp, 8
; MIPSELR6-NEXT: jrc $ra
;
@@ -1547,38 +1547,38 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
; MMEL: # %bb.0: # %entry
; MMEL-NEXT: addiu $sp, $sp, -8
; MMEL-NEXT: .cfi_def_cfa_offset 8
-; MMEL-NEXT: # kill: def $at killed $a1
+; MMEL-NEXT: move $1, $5
; MMEL-NEXT: sync
-; MMEL-NEXT: addiu $1, $zero, -4
-; MMEL-NEXT: and $6, $4, $1
-; MMEL-NEXT: andi $1, $4, 3
-; MMEL-NEXT: sll $10, $1, 3
-; MMEL-NEXT: ori $1, $zero, 65535
-; MMEL-NEXT: sllv $8, $1, $10
-; MMEL-NEXT: nor $9, $zero, $8
-; MMEL-NEXT: sllv $7, $5, $10
+; MMEL-NEXT: addiu $2, $zero, -4
+; MMEL-NEXT: and $2, $4, $2
+; MMEL-NEXT: andi $3, $4, 3
+; MMEL-NEXT: sll $3, $3, 3
+; MMEL-NEXT: ori $4, $zero, 65535
+; MMEL-NEXT: sllv $4, $4, $3
+; MMEL-NEXT: nor $6, $zero, $4
+; MMEL-NEXT: sllv $5, $5, $3
; MMEL-NEXT: $BB5_1: # %entry
; MMEL-NEXT: # =>This Inner Loop Header: Depth=1
-; MMEL-NEXT: ll $2, 0($6)
-; MMEL-NEXT: and $2, $2, $8
-; MMEL-NEXT: and $7, $7, $8
-; MMEL-NEXT: slt $5, $2, $7
-; MMEL-NEXT: or $3, $2, $zero
-; MMEL-NEXT: movz $3, $7, $5
-; MMEL-NEXT: and $3, $3, $8
-; MMEL-NEXT: and $4, $2, $9
-; MMEL-NEXT: or $4, $4, $3
-; MMEL-NEXT: sc $4, 0($6)
-; MMEL-NEXT: beqzc $4, $BB5_1
+; MMEL-NEXT: ll $8, 0($2)
+; MMEL-NEXT: and $8, $8, $4
+; MMEL-NEXT: and $5, $5, $4
+; MMEL-NEXT: slt $11, $8, $5
+; MMEL-NEXT: or $9, $8, $zero
+; MMEL-NEXT: movz $9, $5, $11
+; MMEL-NEXT: and $9, $9, $4
+; MMEL-NEXT: and $10, $8, $6
+; MMEL-NEXT: or $10, $10, $9
+; MMEL-NEXT: sc $10, 0($2)
+; MMEL-NEXT: beqzc $10, $BB5_1
; MMEL-NEXT: # %bb.2: # %entry
-; MMEL-NEXT: and $1, $2, $8
-; MMEL-NEXT: srlv $1, $1, $10
-; MMEL-NEXT: seh $1, $1
+; MMEL-NEXT: and $7, $8, $4
+; MMEL-NEXT: srlv $7, $7, $3
+; MMEL-NEXT: seh $7, $7
; MMEL-NEXT: # %bb.3: # %entry
-; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMEL-NEXT: # %bb.4: # %entry
-; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMEL-NEXT: sync
+; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMEL-NEXT: addiusp 8
; MMEL-NEXT: jrc $ra
;
@@ -1586,39 +1586,39 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
; MMELR6: # %bb.0: # %entry
; MMELR6-NEXT: addiu $sp, $sp, -8
; MMELR6-NEXT: .cfi_def_cfa_offset 8
-; MMELR6-NEXT: # kill: def $at killed $a1
+; MMELR6-NEXT: move $1, $5
; MMELR6-NEXT: sync
-; MMELR6-NEXT: addiu $1, $zero, -4
-; MMELR6-NEXT: and $6, $4, $1
-; MMELR6-NEXT: andi $1, $4, 3
-; MMELR6-NEXT: sll $10, $1, 3
-; MMELR6-NEXT: ori $1, $zero, 65535
-; MMELR6-NEXT: sllv $8, $1, $10
-; MMELR6-NEXT: nor $9, $zero, $8
-; MMELR6-NEXT: sllv $7, $5, $10
+; MMELR6-NEXT: addiu $2, $zero, -4
+; MMELR6-NEXT: and $2, $4, $2
+; MMELR6-NEXT: andi $3, $4, 3
+; MMELR6-NEXT: sll $3, $3, 3
+; MMELR6-NEXT: ori $4, $zero, 65535
+; MMELR6-NEXT: sllv $4, $4, $3
+; MMELR6-NEXT: nor $6, $zero, $4
+; MMELR6-NEXT: sllv $5, $5, $3
; MMELR6-NEXT: $BB5_1: # %entry
; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MMELR6-NEXT: ll $2, 0($6)
-; MMELR6-NEXT: and $2, $2, $8
-; MMELR6-NEXT: and $7, $7, $8
-; MMELR6-NEXT: slt $5, $2, $7
-; MMELR6-NEXT: selnez $3, $2, $5
-; MMELR6-NEXT: seleqz $5, $7, $5
-; MMELR6-NEXT: or $3, $3, $5
-; MMELR6-NEXT: and $3, $3, $8
-; MMELR6-NEXT: and $4, $2, $9
-; MMELR6-NEXT: or $4, $4, $3
-; MMELR6-NEXT: sc $4, 0($6)
-; MMELR6-NEXT: beqc $4, $zero, $BB5_1
+; MMELR6-NEXT: ll $8, 0($2)
+; MMELR6-NEXT: and $8, $8, $4
+; MMELR6-NEXT: and $5, $5, $4
+; MMELR6-NEXT: slt $11, $8, $5
+; MMELR6-NEXT: selnez $9, $8, $11
+; MMELR6-NEXT: seleqz $11, $5, $11
+; MMELR6-NEXT: or $9, $9, $11
+; MMELR6-NEXT: and $9, $9, $4
+; MMELR6-NEXT: and $10, $8, $6
+; MMELR6-NEXT: or $10, $10, $9
+; MMELR6-NEXT: sc $10, 0($2)
+; MMELR6-NEXT: beqc $10, $zero, $BB5_1
; MMELR6-NEXT: # %bb.2: # %entry
-; MMELR6-NEXT: and $1, $2, $8
-; MMELR6-NEXT: srlv $1, $1, $10
-; MMELR6-NEXT: seh $1, $1
+; MMELR6-NEXT: and $7, $8, $4
+; MMELR6-NEXT: srlv $7, $7, $3
+; MMELR6-NEXT: seh $7, $7
; MMELR6-NEXT: # %bb.3: # %entry
-; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMELR6-NEXT: # %bb.4: # %entry
-; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMELR6-NEXT: sync
+; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMELR6-NEXT: addiu $sp, $sp, 8
; MMELR6-NEXT: jrc $ra
;
@@ -1626,38 +1626,38 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: daddiu $sp, $sp, -16
; MIPS64-NEXT: .cfi_def_cfa_offset 16
-; MIPS64-NEXT: move $1, $5
+; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64-NEXT: sync
-; MIPS64-NEXT: daddiu $2, $zero, -4
-; MIPS64-NEXT: and $6, $4, $2
+; MIPS64-NEXT: daddiu $1, $zero, -4
+; MIPS64-NEXT: and $1, $4, $1
; MIPS64-NEXT: andi $2, $4, 3
; MIPS64-NEXT: xori $2, $2, 2
-; MIPS64-NEXT: sll $10, $2, 3
-; MIPS64-NEXT: ori $2, $zero, 65535
-; MIPS64-NEXT: sllv $8, $2, $10
-; MIPS64-NEXT: nor $9, $zero, $8
-; MIPS64-NEXT: sllv $7, $1, $10
+; MIPS64-NEXT: sll $2, $2, 3
+; MIPS64-NEXT: ori $3, $zero, 65535
+; MIPS64-NEXT: sllv $3, $3, $2
+; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB5_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $2, 0($6)
-; MIPS64-NEXT: slt $5, $2, $7
-; MIPS64-NEXT: move $3, $2
-; MIPS64-NEXT: movz $3, $7, $5
-; MIPS64-NEXT: and $3, $3, $8
-; MIPS64-NEXT: and $4, $2, $9
-; MIPS64-NEXT: or $4, $4, $3
-; MIPS64-NEXT: sc $4, 0($6)
-; MIPS64-NEXT: beqz $4, .LBB5_1
+; MIPS64-NEXT: ll $7, 0($1)
+; MIPS64-NEXT: slt $10, $7, $5
+; MIPS64-NEXT: move $8, $7
+; MIPS64-NEXT: movz $8, $5, $10
+; MIPS64-NEXT: and $8, $8, $3
+; MIPS64-NEXT: and $9, $7, $4
+; MIPS64-NEXT: or $9, $9, $8
+; MIPS64-NEXT: sc $9, 0($1)
+; MIPS64-NEXT: beqz $9, .LBB5_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $1, $2, $8
-; MIPS64-NEXT: srlv $1, $1, $10
-; MIPS64-NEXT: seh $1, $1
+; MIPS64-NEXT: and $6, $7, $3
+; MIPS64-NEXT: srlv $6, $6, $2
+; MIPS64-NEXT: seh $6, $6
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
-; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: sync
+; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: daddiu $sp, $sp, 16
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
@@ -1666,38 +1666,38 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: daddiu $sp, $sp, -16
; MIPS64R6-NEXT: .cfi_def_cfa_offset 16
-; MIPS64R6-NEXT: move $1, $5
+; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64R6-NEXT: sync
-; MIPS64R6-NEXT: daddiu $2, $zero, -4
-; MIPS64R6-NEXT: and $6, $4, $2
+; MIPS64R6-NEXT: daddiu $1, $zero, -4
+; MIPS64R6-NEXT: and $1, $4, $1
; MIPS64R6-NEXT: andi $2, $4, 3
; MIPS64R6-NEXT: xori $2, $2, 2
-; MIPS64R6-NEXT: sll $10, $2, 3
-; MIPS64R6-NEXT: ori $2, $zero, 65535
-; MIPS64R6-NEXT: sllv $8, $2, $10
-; MIPS64R6-NEXT: nor $9, $zero, $8
-; MIPS64R6-NEXT: sllv $7, $1, $10
+; MIPS64R6-NEXT: sll $2, $2, 3
+; MIPS64R6-NEXT: ori $3, $zero, 65535
+; MIPS64R6-NEXT: sllv $3, $3, $2
+; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB5_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $2, 0($6)
-; MIPS64R6-NEXT: slt $5, $2, $7
-; MIPS64R6-NEXT: selnez $3, $2, $5
-; MIPS64R6-NEXT: seleqz $5, $7, $5
-; MIPS64R6-NEXT: or $3, $3, $5
-; MIPS64R6-NEXT: and $3, $3, $8
-; MIPS64R6-NEXT: and $4, $2, $9
-; MIPS64R6-NEXT: or $4, $4, $3
-; MIPS64R6-NEXT: sc $4, 0($6)
-; MIPS64R6-NEXT: beqzc $4, .LBB5_1
+; MIPS64R6-NEXT: ll $7, 0($1)
+; MIPS64R6-NEXT: slt $10, $7, $5
+; MIPS64R6-NEXT: selnez $8, $7, $10
+; MIPS64R6-NEXT: seleqz $10, $5, $10
+; MIPS64R6-NEXT: or $8, $8, $10
+; MIPS64R6-NEXT: and $8, $8, $3
+; MIPS64R6-NEXT: and $9, $7, $4
+; MIPS64R6-NEXT: or $9, $9, $8
+; MIPS64R6-NEXT: sc $9, 0($1)
+; MIPS64R6-NEXT: beqzc $9, .LBB5_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $1, $2, $8
-; MIPS64R6-NEXT: srlv $1, $1, $10
-; MIPS64R6-NEXT: seh $1, $1
+; MIPS64R6-NEXT: and $6, $7, $3
+; MIPS64R6-NEXT: srlv $6, $6, $2
+; MIPS64R6-NEXT: seh $6, $6
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
-; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: sync
+; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: daddiu $sp, $sp, 16
; MIPS64R6-NEXT: jrc $ra
;
@@ -1705,39 +1705,39 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64EL: # %bb.0: # %entry
; MIPS64EL-NEXT: daddiu $sp, $sp, -16
; MIPS64EL-NEXT: .cfi_def_cfa_offset 16
-; MIPS64EL-NEXT: move $1, $5
+; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64EL-NEXT: sync
-; MIPS64EL-NEXT: daddiu $2, $zero, -4
-; MIPS64EL-NEXT: and $6, $4, $2
+; MIPS64EL-NEXT: daddiu $1, $zero, -4
+; MIPS64EL-NEXT: and $1, $4, $1
; MIPS64EL-NEXT: andi $2, $4, 3
-; MIPS64EL-NEXT: sll $10, $2, 3
-; MIPS64EL-NEXT: ori $2, $zero, 65535
-; MIPS64EL-NEXT: sllv $8, $2, $10
-; MIPS64EL-NEXT: nor $9, $zero, $8
-; MIPS64EL-NEXT: sllv $7, $1, $10
+; MIPS64EL-NEXT: sll $2, $2, 3
+; MIPS64EL-NEXT: ori $3, $zero, 65535
+; MIPS64EL-NEXT: sllv $3, $3, $2
+; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB5_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $2, 0($6)
-; MIPS64EL-NEXT: and $2, $2, $8
-; MIPS64EL-NEXT: and $7, $7, $8
-; MIPS64EL-NEXT: slt $5, $2, $7
-; MIPS64EL-NEXT: move $3, $2
-; MIPS64EL-NEXT: movz $3, $7, $5
-; MIPS64EL-NEXT: and $3, $3, $8
-; MIPS64EL-NEXT: and $4, $2, $9
-; MIPS64EL-NEXT: or $4, $4, $3
-; MIPS64EL-NEXT: sc $4, 0($6)
-; MIPS64EL-NEXT: beqz $4, .LBB5_1
+; MIPS64EL-NEXT: ll $7, 0($1)
+; MIPS64EL-NEXT: and $7, $7, $3
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: slt $10, $7, $5
+; MIPS64EL-NEXT: move $8, $7
+; MIPS64EL-NEXT: movz $8, $5, $10
+; MIPS64EL-NEXT: and $8, $8, $3
+; MIPS64EL-NEXT: and $9, $7, $4
+; MIPS64EL-NEXT: or $9, $9, $8
+; MIPS64EL-NEXT: sc $9, 0($1)
+; MIPS64EL-NEXT: beqz $9, .LBB5_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $1, $2, $8
-; MIPS64EL-NEXT: srlv $1, $1, $10
-; MIPS64EL-NEXT: seh $1, $1
+; MIPS64EL-NEXT: and $6, $7, $3
+; MIPS64EL-NEXT: srlv $6, $6, $2
+; MIPS64EL-NEXT: seh $6, $6
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
-; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: sync
+; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: daddiu $sp, $sp, 16
; MIPS64EL-NEXT: jr $ra
; MIPS64EL-NEXT: nop
@@ -1746,39 +1746,39 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64ELR6: # %bb.0: # %entry
; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16
; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16
-; MIPS64ELR6-NEXT: move $1, $5
+; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64ELR6-NEXT: sync
-; MIPS64ELR6-NEXT: daddiu $2, $zero, -4
-; MIPS64ELR6-NEXT: and $6, $4, $2
+; MIPS64ELR6-NEXT: daddiu $1, $zero, -4
+; MIPS64ELR6-NEXT: and $1, $4, $1
; MIPS64ELR6-NEXT: andi $2, $4, 3
-; MIPS64ELR6-NEXT: sll $10, $2, 3
-; MIPS64ELR6-NEXT: ori $2, $zero, 65535
-; MIPS64ELR6-NEXT: sllv $8, $2, $10
-; MIPS64ELR6-NEXT: nor $9, $zero, $8
-; MIPS64ELR6-NEXT: sllv $7, $1, $10
+; MIPS64ELR6-NEXT: sll $2, $2, 3
+; MIPS64ELR6-NEXT: ori $3, $zero, 65535
+; MIPS64ELR6-NEXT: sllv $3, $3, $2
+; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB5_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $2, 0($6)
-; MIPS64ELR6-NEXT: and $2, $2, $8
-; MIPS64ELR6-NEXT: and $7, $7, $8
-; MIPS64ELR6-NEXT: slt $5, $2, $7
-; MIPS64ELR6-NEXT: selnez $3, $2, $5
-; MIPS64ELR6-NEXT: seleqz $5, $7, $5
-; MIPS64ELR6-NEXT: or $3, $3, $5
-; MIPS64ELR6-NEXT: and $3, $3, $8
-; MIPS64ELR6-NEXT: and $4, $2, $9
-; MIPS64ELR6-NEXT: or $4, $4, $3
-; MIPS64ELR6-NEXT: sc $4, 0($6)
-; MIPS64ELR6-NEXT: beqzc $4, .LBB5_1
+; MIPS64ELR6-NEXT: ll $7, 0($1)
+; MIPS64ELR6-NEXT: and $7, $7, $3
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: slt $10, $7, $5
+; MIPS64ELR6-NEXT: selnez $8, $7, $10
+; MIPS64ELR6-NEXT: seleqz $10, $5, $10
+; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: and $8, $8, $3
+; MIPS64ELR6-NEXT: and $9, $7, $4
+; MIPS64ELR6-NEXT: or $9, $9, $8
+; MIPS64ELR6-NEXT: sc $9, 0($1)
+; MIPS64ELR6-NEXT: beqzc $9, .LBB5_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $1, $2, $8
-; MIPS64ELR6-NEXT: srlv $1, $1, $10
-; MIPS64ELR6-NEXT: seh $1, $1
+; MIPS64ELR6-NEXT: and $6, $7, $3
+; MIPS64ELR6-NEXT: srlv $6, $6, $2
+; MIPS64ELR6-NEXT: seh $6, $6
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
-; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: sync
+; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16
; MIPS64ELR6-NEXT: jrc $ra
entry:
@@ -1791,38 +1791,38 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS: # %bb.0: # %entry
; MIPS-NEXT: addiu $sp, $sp, -8
; MIPS-NEXT: .cfi_def_cfa_offset 8
-; MIPS-NEXT: # kill: def $at killed $a1
+; MIPS-NEXT: move $1, $5
; MIPS-NEXT: sync
-; MIPS-NEXT: addiu $1, $zero, -4
-; MIPS-NEXT: and $6, $4, $1
-; MIPS-NEXT: andi $1, $4, 3
-; MIPS-NEXT: xori $1, $1, 2
-; MIPS-NEXT: sll $10, $1, 3
-; MIPS-NEXT: ori $1, $zero, 65535
-; MIPS-NEXT: sllv $8, $1, $10
-; MIPS-NEXT: nor $9, $zero, $8
-; MIPS-NEXT: sllv $7, $5, $10
+; MIPS-NEXT: addiu $2, $zero, -4
+; MIPS-NEXT: and $2, $4, $2
+; MIPS-NEXT: andi $3, $4, 3
+; MIPS-NEXT: xori $3, $3, 2
+; MIPS-NEXT: sll $3, $3, 3
+; MIPS-NEXT: ori $4, $zero, 65535
+; MIPS-NEXT: sllv $4, $4, $3
+; MIPS-NEXT: nor $6, $zero, $4
+; MIPS-NEXT: sllv $5, $5, $3
; MIPS-NEXT: $BB6_1: # %entry
; MIPS-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS-NEXT: ll $2, 0($6)
-; MIPS-NEXT: sltu $5, $2, $7
-; MIPS-NEXT: move $3, $2
-; MIPS-NEXT: movn $3, $7, $5
-; MIPS-NEXT: and $3, $3, $8
-; MIPS-NEXT: and $4, $2, $9
-; MIPS-NEXT: or $4, $4, $3
-; MIPS-NEXT: sc $4, 0($6)
-; MIPS-NEXT: beqz $4, $BB6_1
+; MIPS-NEXT: ll $8, 0($2)
+; MIPS-NEXT: sltu $11, $8, $5
+; MIPS-NEXT: move $9, $8
+; MIPS-NEXT: movn $9, $5, $11
+; MIPS-NEXT: and $9, $9, $4
+; MIPS-NEXT: and $10, $8, $6
+; MIPS-NEXT: or $10, $10, $9
+; MIPS-NEXT: sc $10, 0($2)
+; MIPS-NEXT: beqz $10, $BB6_1
; MIPS-NEXT: nop
; MIPS-NEXT: # %bb.2: # %entry
-; MIPS-NEXT: and $1, $2, $8
-; MIPS-NEXT: srlv $1, $1, $10
-; MIPS-NEXT: seh $1, $1
+; MIPS-NEXT: and $7, $8, $4
+; MIPS-NEXT: srlv $7, $7, $3
+; MIPS-NEXT: seh $7, $7
; MIPS-NEXT: # %bb.3: # %entry
-; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS-NEXT: # %bb.4: # %entry
-; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS-NEXT: sync
+; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS-NEXT: addiu $sp, $sp, 8
; MIPS-NEXT: jr $ra
; MIPS-NEXT: nop
@@ -1831,38 +1831,38 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
; MIPSR6: # %bb.0: # %entry
; MIPSR6-NEXT: addiu $sp, $sp, -8
; MIPSR6-NEXT: .cfi_def_cfa_offset 8
-; MIPSR6-NEXT: # kill: def $at killed $a1
+; MIPSR6-NEXT: move $1, $5
; MIPSR6-NEXT: sync
-; MIPSR6-NEXT: addiu $1, $zero, -4
-; MIPSR6-NEXT: and $6, $4, $1
-; MIPSR6-NEXT: andi $1, $4, 3
-; MIPSR6-NEXT: xori $1, $1, 2
-; MIPSR6-NEXT: sll $10, $1, 3
-; MIPSR6-NEXT: ori $1, $zero, 65535
-; MIPSR6-NEXT: sllv $8, $1, $10
-; MIPSR6-NEXT: nor $9, $zero, $8
-; MIPSR6-NEXT: sllv $7, $5, $10
+; MIPSR6-NEXT: addiu $2, $zero, -4
+; MIPSR6-NEXT: and $2, $4, $2
+; MIPSR6-NEXT: andi $3, $4, 3
+; MIPSR6-NEXT: xori $3, $3, 2
+; MIPSR6-NEXT: sll $3, $3, 3
+; MIPSR6-NEXT: ori $4, $zero, 65535
+; MIPSR6-NEXT: sllv $4, $4, $3
+; MIPSR6-NEXT: nor $6, $zero, $4
+; MIPSR6-NEXT: sllv $5, $5, $3
; MIPSR6-NEXT: $BB6_1: # %entry
; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSR6-NEXT: ll $2, 0($6)
-; MIPSR6-NEXT: sltu $5, $2, $7
-; MIPSR6-NEXT: seleqz $3, $2, $5
-; MIPSR6-NEXT: selnez $5, $7, $5
-; MIPSR6-NEXT: or $3, $3, $5
-; MIPSR6-NEXT: and $3, $3, $8
-; MIPSR6-NEXT: and $4, $2, $9
-; MIPSR6-NEXT: or $4, $4, $3
-; MIPSR6-NEXT: sc $4, 0($6)
-; MIPSR6-NEXT: beqzc $4, $BB6_1
+; MIPSR6-NEXT: ll $8, 0($2)
+; MIPSR6-NEXT: sltu $11, $8, $5
+; MIPSR6-NEXT: seleqz $9, $8, $11
+; MIPSR6-NEXT: selnez $11, $5, $11
+; MIPSR6-NEXT: or $9, $9, $11
+; MIPSR6-NEXT: and $9, $9, $4
+; MIPSR6-NEXT: and $10, $8, $6
+; MIPSR6-NEXT: or $10, $10, $9
+; MIPSR6-NEXT: sc $10, 0($2)
+; MIPSR6-NEXT: beqzc $10, $BB6_1
; MIPSR6-NEXT: # %bb.2: # %entry
-; MIPSR6-NEXT: and $1, $2, $8
-; MIPSR6-NEXT: srlv $1, $1, $10
-; MIPSR6-NEXT: seh $1, $1
+; MIPSR6-NEXT: and $7, $8, $4
+; MIPSR6-NEXT: srlv $7, $7, $3
+; MIPSR6-NEXT: seh $7, $7
; MIPSR6-NEXT: # %bb.3: # %entry
-; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSR6-NEXT: # %bb.4: # %entry
-; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSR6-NEXT: sync
+; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSR6-NEXT: addiu $sp, $sp, 8
; MIPSR6-NEXT: jrc $ra
;
@@ -1870,37 +1870,37 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
; MM: # %bb.0: # %entry
; MM-NEXT: addiu $sp, $sp, -8
; MM-NEXT: .cfi_def_cfa_offset 8
-; MM-NEXT: # kill: def $at killed $a1
+; MM-NEXT: move $1, $5
; MM-NEXT: sync
-; MM-NEXT: addiu $1, $zero, -4
-; MM-NEXT: and $6, $4, $1
-; MM-NEXT: andi $1, $4, 3
-; MM-NEXT: xori $1, $1, 2
-; MM-NEXT: sll $10, $1, 3
-; MM-NEXT: ori $1, $zero, 65535
-; MM-NEXT: sllv $8, $1, $10
-; MM-NEXT: nor $9, $zero, $8
-; MM-NEXT: sllv $7, $5, $10
+; MM-NEXT: addiu $2, $zero, -4
+; MM-NEXT: and $2, $4, $2
+; MM-NEXT: andi $3, $4, 3
+; MM-NEXT: xori $3, $3, 2
+; MM-NEXT: sll $3, $3, 3
+; MM-NEXT: ori $4, $zero, 65535
+; MM-NEXT: sllv $4, $4, $3
+; MM-NEXT: nor $6, $zero, $4
+; MM-NEXT: sllv $5, $5, $3
; MM-NEXT: $BB6_1: # %entry
; MM-NEXT: # =>This Inner Loop Header: Depth=1
-; MM-NEXT: ll $2, 0($6)
-; MM-NEXT: sltu $5, $2, $7
-; MM-NEXT: or $3, $2, $zero
-; MM-NEXT: movn $3, $7, $5
-; MM-NEXT: and $3, $3, $8
-; MM-NEXT: and $4, $2, $9
-; MM-NEXT: or $4, $4, $3
-; MM-NEXT: sc $4, 0($6)
-; MM-NEXT: beqzc $4, $BB6_1
+; MM-NEXT: ll $8, 0($2)
+; MM-NEXT: sltu $11, $8, $5
+; MM-NEXT: or $9, $8, $zero
+; MM-NEXT: movn $9, $5, $11
+; MM-NEXT: and $9, $9, $4
+; MM-NEXT: and $10, $8, $6
+; MM-NEXT: or $10, $10, $9
+; MM-NEXT: sc $10, 0($2)
+; MM-NEXT: beqzc $10, $BB6_1
; MM-NEXT: # %bb.2: # %entry
-; MM-NEXT: and $1, $2, $8
-; MM-NEXT: srlv $1, $1, $10
-; MM-NEXT: seh $1, $1
+; MM-NEXT: and $7, $8, $4
+; MM-NEXT: srlv $7, $7, $3
+; MM-NEXT: seh $7, $7
; MM-NEXT: # %bb.3: # %entry
-; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MM-NEXT: # %bb.4: # %entry
-; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MM-NEXT: sync
+; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MM-NEXT: addiusp 8
; MM-NEXT: jrc $ra
;
@@ -1908,38 +1908,38 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
; MMR6: # %bb.0: # %entry
; MMR6-NEXT: addiu $sp, $sp, -8
; MMR6-NEXT: .cfi_def_cfa_offset 8
-; MMR6-NEXT: # kill: def $at killed $a1
+; MMR6-NEXT: move $1, $5
; MMR6-NEXT: sync
-; MMR6-NEXT: addiu $1, $zero, -4
-; MMR6-NEXT: and $6, $4, $1
-; MMR6-NEXT: andi $1, $4, 3
-; MMR6-NEXT: xori $1, $1, 2
-; MMR6-NEXT: sll $10, $1, 3
-; MMR6-NEXT: ori $1, $zero, 65535
-; MMR6-NEXT: sllv $8, $1, $10
-; MMR6-NEXT: nor $9, $zero, $8
-; MMR6-NEXT: sllv $7, $5, $10
+; MMR6-NEXT: addiu $2, $zero, -4
+; MMR6-NEXT: and $2, $4, $2
+; MMR6-NEXT: andi $3, $4, 3
+; MMR6-NEXT: xori $3, $3, 2
+; MMR6-NEXT: sll $3, $3, 3
+; MMR6-NEXT: ori $4, $zero, 65535
+; MMR6-NEXT: sllv $4, $4, $3
+; MMR6-NEXT: nor $6, $zero, $4
+; MMR6-NEXT: sllv $5, $5, $3
; MMR6-NEXT: $BB6_1: # %entry
; MMR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MMR6-NEXT: ll $2, 0($6)
-; MMR6-NEXT: sltu $5, $2, $7
-; MMR6-NEXT: seleqz $3, $2, $5
-; MMR6-NEXT: selnez $5, $7, $5
-; MMR6-NEXT: or $3, $3, $5
-; MMR6-NEXT: and $3, $3, $8
-; MMR6-NEXT: and $4, $2, $9
-; MMR6-NEXT: or $4, $4, $3
-; MMR6-NEXT: sc $4, 0($6)
-; MMR6-NEXT: beqc $4, $zero, $BB6_1
+; MMR6-NEXT: ll $8, 0($2)
+; MMR6-NEXT: sltu $11, $8, $5
+; MMR6-NEXT: seleqz $9, $8, $11
+; MMR6-NEXT: selnez $11, $5, $11
+; MMR6-NEXT: or $9, $9, $11
+; MMR6-NEXT: and $9, $9, $4
+; MMR6-NEXT: and $10, $8, $6
+; MMR6-NEXT: or $10, $10, $9
+; MMR6-NEXT: sc $10, 0($2)
+; MMR6-NEXT: beqc $10, $zero, $BB6_1
; MMR6-NEXT: # %bb.2: # %entry
-; MMR6-NEXT: and $1, $2, $8
-; MMR6-NEXT: srlv $1, $1, $10
-; MMR6-NEXT: seh $1, $1
+; MMR6-NEXT: and $7, $8, $4
+; MMR6-NEXT: srlv $7, $7, $3
+; MMR6-NEXT: seh $7, $7
; MMR6-NEXT: # %bb.3: # %entry
-; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMR6-NEXT: # %bb.4: # %entry
-; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMR6-NEXT: sync
+; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMR6-NEXT: addiu $sp, $sp, 8
; MMR6-NEXT: jrc $ra
;
@@ -1947,39 +1947,39 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
; MIPSEL: # %bb.0: # %entry
; MIPSEL-NEXT: addiu $sp, $sp, -8
; MIPSEL-NEXT: .cfi_def_cfa_offset 8
-; MIPSEL-NEXT: # kill: def $at killed $a1
+; MIPSEL-NEXT: move $1, $5
; MIPSEL-NEXT: sync
-; MIPSEL-NEXT: addiu $1, $zero, -4
-; MIPSEL-NEXT: and $6, $4, $1
-; MIPSEL-NEXT: andi $1, $4, 3
-; MIPSEL-NEXT: sll $10, $1, 3
-; MIPSEL-NEXT: ori $1, $zero, 65535
-; MIPSEL-NEXT: sllv $8, $1, $10
-; MIPSEL-NEXT: nor $9, $zero, $8
-; MIPSEL-NEXT: sllv $7, $5, $10
+; MIPSEL-NEXT: addiu $2, $zero, -4
+; MIPSEL-NEXT: and $2, $4, $2
+; MIPSEL-NEXT: andi $3, $4, 3
+; MIPSEL-NEXT: sll $3, $3, 3
+; MIPSEL-NEXT: ori $4, $zero, 65535
+; MIPSEL-NEXT: sllv $4, $4, $3
+; MIPSEL-NEXT: nor $6, $zero, $4
+; MIPSEL-NEXT: sllv $5, $5, $3
; MIPSEL-NEXT: $BB6_1: # %entry
; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSEL-NEXT: ll $2, 0($6)
-; MIPSEL-NEXT: and $2, $2, $8
-; MIPSEL-NEXT: and $7, $7, $8
-; MIPSEL-NEXT: sltu $5, $2, $7
-; MIPSEL-NEXT: move $3, $2
-; MIPSEL-NEXT: movn $3, $7, $5
-; MIPSEL-NEXT: and $3, $3, $8
-; MIPSEL-NEXT: and $4, $2, $9
-; MIPSEL-NEXT: or $4, $4, $3
-; MIPSEL-NEXT: sc $4, 0($6)
-; MIPSEL-NEXT: beqz $4, $BB6_1
+; MIPSEL-NEXT: ll $8, 0($2)
+; MIPSEL-NEXT: and $8, $8, $4
+; MIPSEL-NEXT: and $5, $5, $4
+; MIPSEL-NEXT: sltu $11, $8, $5
+; MIPSEL-NEXT: move $9, $8
+; MIPSEL-NEXT: movn $9, $5, $11
+; MIPSEL-NEXT: and $9, $9, $4
+; MIPSEL-NEXT: and $10, $8, $6
+; MIPSEL-NEXT: or $10, $10, $9
+; MIPSEL-NEXT: sc $10, 0($2)
+; MIPSEL-NEXT: beqz $10, $BB6_1
; MIPSEL-NEXT: nop
; MIPSEL-NEXT: # %bb.2: # %entry
-; MIPSEL-NEXT: and $1, $2, $8
-; MIPSEL-NEXT: srlv $1, $1, $10
-; MIPSEL-NEXT: seh $1, $1
+; MIPSEL-NEXT: and $7, $8, $4
+; MIPSEL-NEXT: srlv $7, $7, $3
+; MIPSEL-NEXT: seh $7, $7
; MIPSEL-NEXT: # %bb.3: # %entry
-; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSEL-NEXT: # %bb.4: # %entry
-; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSEL-NEXT: sync
+; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSEL-NEXT: addiu $sp, $sp, 8
; MIPSEL-NEXT: jr $ra
; MIPSEL-NEXT: nop
@@ -1988,39 +1988,39 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
; MIPSELR6: # %bb.0: # %entry
; MIPSELR6-NEXT: addiu $sp, $sp, -8
; MIPSELR6-NEXT: .cfi_def_cfa_offset 8
-; MIPSELR6-NEXT: # kill: def $at killed $a1
+; MIPSELR6-NEXT: move $1, $5
; MIPSELR6-NEXT: sync
-; MIPSELR6-NEXT: addiu $1, $zero, -4
-; MIPSELR6-NEXT: and $6, $4, $1
-; MIPSELR6-NEXT: andi $1, $4, 3
-; MIPSELR6-NEXT: sll $10, $1, 3
-; MIPSELR6-NEXT: ori $1, $zero, 65535
-; MIPSELR6-NEXT: sllv $8, $1, $10
-; MIPSELR6-NEXT: nor $9, $zero, $8
-; MIPSELR6-NEXT: sllv $7, $5, $10
+; MIPSELR6-NEXT: addiu $2, $zero, -4
+; MIPSELR6-NEXT: and $2, $4, $2
+; MIPSELR6-NEXT: andi $3, $4, 3
+; MIPSELR6-NEXT: sll $3, $3, 3
+; MIPSELR6-NEXT: ori $4, $zero, 65535
+; MIPSELR6-NEXT: sllv $4, $4, $3
+; MIPSELR6-NEXT: nor $6, $zero, $4
+; MIPSELR6-NEXT: sllv $5, $5, $3
; MIPSELR6-NEXT: $BB6_1: # %entry
; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSELR6-NEXT: ll $2, 0($6)
-; MIPSELR6-NEXT: and $2, $2, $8
-; MIPSELR6-NEXT: and $7, $7, $8
-; MIPSELR6-NEXT: sltu $5, $2, $7
-; MIPSELR6-NEXT: seleqz $3, $2, $5
-; MIPSELR6-NEXT: selnez $5, $7, $5
-; MIPSELR6-NEXT: or $3, $3, $5
-; MIPSELR6-NEXT: and $3, $3, $8
-; MIPSELR6-NEXT: and $4, $2, $9
-; MIPSELR6-NEXT: or $4, $4, $3
-; MIPSELR6-NEXT: sc $4, 0($6)
-; MIPSELR6-NEXT: beqzc $4, $BB6_1
+; MIPSELR6-NEXT: ll $8, 0($2)
+; MIPSELR6-NEXT: and $8, $8, $4
+; MIPSELR6-NEXT: and $5, $5, $4
+; MIPSELR6-NEXT: sltu $11, $8, $5
+; MIPSELR6-NEXT: seleqz $9, $8, $11
+; MIPSELR6-NEXT: selnez $11, $5, $11
+; MIPSELR6-NEXT: or $9, $9, $11
+; MIPSELR6-NEXT: and $9, $9, $4
+; MIPSELR6-NEXT: and $10, $8, $6
+; MIPSELR6-NEXT: or $10, $10, $9
+; MIPSELR6-NEXT: sc $10, 0($2)
+; MIPSELR6-NEXT: beqzc $10, $BB6_1
; MIPSELR6-NEXT: # %bb.2: # %entry
-; MIPSELR6-NEXT: and $1, $2, $8
-; MIPSELR6-NEXT: srlv $1, $1, $10
-; MIPSELR6-NEXT: seh $1, $1
+; MIPSELR6-NEXT: and $7, $8, $4
+; MIPSELR6-NEXT: srlv $7, $7, $3
+; MIPSELR6-NEXT: seh $7, $7
; MIPSELR6-NEXT: # %bb.3: # %entry
-; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSELR6-NEXT: # %bb.4: # %entry
-; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSELR6-NEXT: sync
+; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSELR6-NEXT: addiu $sp, $sp, 8
; MIPSELR6-NEXT: jrc $ra
;
@@ -2028,38 +2028,38 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
; MMEL: # %bb.0: # %entry
; MMEL-NEXT: addiu $sp, $sp, -8
; MMEL-NEXT: .cfi_def_cfa_offset 8
-; MMEL-NEXT: # kill: def $at killed $a1
+; MMEL-NEXT: move $1, $5
; MMEL-NEXT: sync
-; MMEL-NEXT: addiu $1, $zero, -4
-; MMEL-NEXT: and $6, $4, $1
-; MMEL-NEXT: andi $1, $4, 3
-; MMEL-NEXT: sll $10, $1, 3
-; MMEL-NEXT: ori $1, $zero, 65535
-; MMEL-NEXT: sllv $8, $1, $10
-; MMEL-NEXT: nor $9, $zero, $8
-; MMEL-NEXT: sllv $7, $5, $10
+; MMEL-NEXT: addiu $2, $zero, -4
+; MMEL-NEXT: and $2, $4, $2
+; MMEL-NEXT: andi $3, $4, 3
+; MMEL-NEXT: sll $3, $3, 3
+; MMEL-NEXT: ori $4, $zero, 65535
+; MMEL-NEXT: sllv $4, $4, $3
+; MMEL-NEXT: nor $6, $zero, $4
+; MMEL-NEXT: sllv $5, $5, $3
; MMEL-NEXT: $BB6_1: # %entry
; MMEL-NEXT: # =>This Inner Loop Header: Depth=1
-; MMEL-NEXT: ll $2, 0($6)
-; MMEL-NEXT: and $2, $2, $8
-; MMEL-NEXT: and $7, $7, $8
-; MMEL-NEXT: sltu $5, $2, $7
-; MMEL-NEXT: or $3, $2, $zero
-; MMEL-NEXT: movn $3, $7, $5
-; MMEL-NEXT: and $3, $3, $8
-; MMEL-NEXT: and $4, $2, $9
-; MMEL-NEXT: or $4, $4, $3
-; MMEL-NEXT: sc $4, 0($6)
-; MMEL-NEXT: beqzc $4, $BB6_1
+; MMEL-NEXT: ll $8, 0($2)
+; MMEL-NEXT: and $8, $8, $4
+; MMEL-NEXT: and $5, $5, $4
+; MMEL-NEXT: sltu $11, $8, $5
+; MMEL-NEXT: or $9, $8, $zero
+; MMEL-NEXT: movn $9, $5, $11
+; MMEL-NEXT: and $9, $9, $4
+; MMEL-NEXT: and $10, $8, $6
+; MMEL-NEXT: or $10, $10, $9
+; MMEL-NEXT: sc $10, 0($2)
+; MMEL-NEXT: beqzc $10, $BB6_1
; MMEL-NEXT: # %bb.2: # %entry
-; MMEL-NEXT: and $1, $2, $8
-; MMEL-NEXT: srlv $1, $1, $10
-; MMEL-NEXT: seh $1, $1
+; MMEL-NEXT: and $7, $8, $4
+; MMEL-NEXT: srlv $7, $7, $3
+; MMEL-NEXT: seh $7, $7
; MMEL-NEXT: # %bb.3: # %entry
-; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMEL-NEXT: # %bb.4: # %entry
-; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMEL-NEXT: sync
+; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMEL-NEXT: addiusp 8
; MMEL-NEXT: jrc $ra
;
@@ -2067,39 +2067,39 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
; MMELR6: # %bb.0: # %entry
; MMELR6-NEXT: addiu $sp, $sp, -8
; MMELR6-NEXT: .cfi_def_cfa_offset 8
-; MMELR6-NEXT: # kill: def $at killed $a1
+; MMELR6-NEXT: move $1, $5
; MMELR6-NEXT: sync
-; MMELR6-NEXT: addiu $1, $zero, -4
-; MMELR6-NEXT: and $6, $4, $1
-; MMELR6-NEXT: andi $1, $4, 3
-; MMELR6-NEXT: sll $10, $1, 3
-; MMELR6-NEXT: ori $1, $zero, 65535
-; MMELR6-NEXT: sllv $8, $1, $10
-; MMELR6-NEXT: nor $9, $zero, $8
-; MMELR6-NEXT: sllv $7, $5, $10
+; MMELR6-NEXT: addiu $2, $zero, -4
+; MMELR6-NEXT: and $2, $4, $2
+; MMELR6-NEXT: andi $3, $4, 3
+; MMELR6-NEXT: sll $3, $3, 3
+; MMELR6-NEXT: ori $4, $zero, 65535
+; MMELR6-NEXT: sllv $4, $4, $3
+; MMELR6-NEXT: nor $6, $zero, $4
+; MMELR6-NEXT: sllv $5, $5, $3
; MMELR6-NEXT: $BB6_1: # %entry
; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MMELR6-NEXT: ll $2, 0($6)
-; MMELR6-NEXT: and $2, $2, $8
-; MMELR6-NEXT: and $7, $7, $8
-; MMELR6-NEXT: sltu $5, $2, $7
-; MMELR6-NEXT: seleqz $3, $2, $5
-; MMELR6-NEXT: selnez $5, $7, $5
-; MMELR6-NEXT: or $3, $3, $5
-; MMELR6-NEXT: and $3, $3, $8
-; MMELR6-NEXT: and $4, $2, $9
-; MMELR6-NEXT: or $4, $4, $3
-; MMELR6-NEXT: sc $4, 0($6)
-; MMELR6-NEXT: beqc $4, $zero, $BB6_1
+; MMELR6-NEXT: ll $8, 0($2)
+; MMELR6-NEXT: and $8, $8, $4
+; MMELR6-NEXT: and $5, $5, $4
+; MMELR6-NEXT: sltu $11, $8, $5
+; MMELR6-NEXT: seleqz $9, $8, $11
+; MMELR6-NEXT: selnez $11, $5, $11
+; MMELR6-NEXT: or $9, $9, $11
+; MMELR6-NEXT: and $9, $9, $4
+; MMELR6-NEXT: and $10, $8, $6
+; MMELR6-NEXT: or $10, $10, $9
+; MMELR6-NEXT: sc $10, 0($2)
+; MMELR6-NEXT: beqc $10, $zero, $BB6_1
; MMELR6-NEXT: # %bb.2: # %entry
-; MMELR6-NEXT: and $1, $2, $8
-; MMELR6-NEXT: srlv $1, $1, $10
-; MMELR6-NEXT: seh $1, $1
+; MMELR6-NEXT: and $7, $8, $4
+; MMELR6-NEXT: srlv $7, $7, $3
+; MMELR6-NEXT: seh $7, $7
; MMELR6-NEXT: # %bb.3: # %entry
-; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMELR6-NEXT: # %bb.4: # %entry
-; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMELR6-NEXT: sync
+; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMELR6-NEXT: addiu $sp, $sp, 8
; MMELR6-NEXT: jrc $ra
;
@@ -2107,38 +2107,38 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: daddiu $sp, $sp, -16
; MIPS64-NEXT: .cfi_def_cfa_offset 16
-; MIPS64-NEXT: move $1, $5
+; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64-NEXT: sync
-; MIPS64-NEXT: daddiu $2, $zero, -4
-; MIPS64-NEXT: and $6, $4, $2
+; MIPS64-NEXT: daddiu $1, $zero, -4
+; MIPS64-NEXT: and $1, $4, $1
; MIPS64-NEXT: andi $2, $4, 3
; MIPS64-NEXT: xori $2, $2, 2
-; MIPS64-NEXT: sll $10, $2, 3
-; MIPS64-NEXT: ori $2, $zero, 65535
-; MIPS64-NEXT: sllv $8, $2, $10
-; MIPS64-NEXT: nor $9, $zero, $8
-; MIPS64-NEXT: sllv $7, $1, $10
+; MIPS64-NEXT: sll $2, $2, 3
+; MIPS64-NEXT: ori $3, $zero, 65535
+; MIPS64-NEXT: sllv $3, $3, $2
+; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB6_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $2, 0($6)
-; MIPS64-NEXT: sltu $5, $2, $7
-; MIPS64-NEXT: move $3, $2
-; MIPS64-NEXT: movn $3, $7, $5
-; MIPS64-NEXT: and $3, $3, $8
-; MIPS64-NEXT: and $4, $2, $9
-; MIPS64-NEXT: or $4, $4, $3
-; MIPS64-NEXT: sc $4, 0($6)
-; MIPS64-NEXT: beqz $4, .LBB6_1
+; MIPS64-NEXT: ll $7, 0($1)
+; MIPS64-NEXT: sltu $10, $7, $5
+; MIPS64-NEXT: move $8, $7
+; MIPS64-NEXT: movn $8, $5, $10
+; MIPS64-NEXT: and $8, $8, $3
+; MIPS64-NEXT: and $9, $7, $4
+; MIPS64-NEXT: or $9, $9, $8
+; MIPS64-NEXT: sc $9, 0($1)
+; MIPS64-NEXT: beqz $9, .LBB6_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $1, $2, $8
-; MIPS64-NEXT: srlv $1, $1, $10
-; MIPS64-NEXT: seh $1, $1
+; MIPS64-NEXT: and $6, $7, $3
+; MIPS64-NEXT: srlv $6, $6, $2
+; MIPS64-NEXT: seh $6, $6
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
-; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: sync
+; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: daddiu $sp, $sp, 16
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
@@ -2147,38 +2147,38 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: daddiu $sp, $sp, -16
; MIPS64R6-NEXT: .cfi_def_cfa_offset 16
-; MIPS64R6-NEXT: move $1, $5
+; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64R6-NEXT: sync
-; MIPS64R6-NEXT: daddiu $2, $zero, -4
-; MIPS64R6-NEXT: and $6, $4, $2
+; MIPS64R6-NEXT: daddiu $1, $zero, -4
+; MIPS64R6-NEXT: and $1, $4, $1
; MIPS64R6-NEXT: andi $2, $4, 3
; MIPS64R6-NEXT: xori $2, $2, 2
-; MIPS64R6-NEXT: sll $10, $2, 3
-; MIPS64R6-NEXT: ori $2, $zero, 65535
-; MIPS64R6-NEXT: sllv $8, $2, $10
-; MIPS64R6-NEXT: nor $9, $zero, $8
-; MIPS64R6-NEXT: sllv $7, $1, $10
+; MIPS64R6-NEXT: sll $2, $2, 3
+; MIPS64R6-NEXT: ori $3, $zero, 65535
+; MIPS64R6-NEXT: sllv $3, $3, $2
+; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB6_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $2, 0($6)
-; MIPS64R6-NEXT: sltu $5, $2, $7
-; MIPS64R6-NEXT: seleqz $3, $2, $5
-; MIPS64R6-NEXT: selnez $5, $7, $5
-; MIPS64R6-NEXT: or $3, $3, $5
-; MIPS64R6-NEXT: and $3, $3, $8
-; MIPS64R6-NEXT: and $4, $2, $9
-; MIPS64R6-NEXT: or $4, $4, $3
-; MIPS64R6-NEXT: sc $4, 0($6)
-; MIPS64R6-NEXT: beqzc $4, .LBB6_1
+; MIPS64R6-NEXT: ll $7, 0($1)
+; MIPS64R6-NEXT: sltu $10, $7, $5
+; MIPS64R6-NEXT: seleqz $8, $7, $10
+; MIPS64R6-NEXT: selnez $10, $5, $10
+; MIPS64R6-NEXT: or $8, $8, $10
+; MIPS64R6-NEXT: and $8, $8, $3
+; MIPS64R6-NEXT: and $9, $7, $4
+; MIPS64R6-NEXT: or $9, $9, $8
+; MIPS64R6-NEXT: sc $9, 0($1)
+; MIPS64R6-NEXT: beqzc $9, .LBB6_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $1, $2, $8
-; MIPS64R6-NEXT: srlv $1, $1, $10
-; MIPS64R6-NEXT: seh $1, $1
+; MIPS64R6-NEXT: and $6, $7, $3
+; MIPS64R6-NEXT: srlv $6, $6, $2
+; MIPS64R6-NEXT: seh $6, $6
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
-; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: sync
+; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: daddiu $sp, $sp, 16
; MIPS64R6-NEXT: jrc $ra
;
@@ -2186,39 +2186,39 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64EL: # %bb.0: # %entry
; MIPS64EL-NEXT: daddiu $sp, $sp, -16
; MIPS64EL-NEXT: .cfi_def_cfa_offset 16
-; MIPS64EL-NEXT: move $1, $5
+; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64EL-NEXT: sync
-; MIPS64EL-NEXT: daddiu $2, $zero, -4
-; MIPS64EL-NEXT: and $6, $4, $2
+; MIPS64EL-NEXT: daddiu $1, $zero, -4
+; MIPS64EL-NEXT: and $1, $4, $1
; MIPS64EL-NEXT: andi $2, $4, 3
-; MIPS64EL-NEXT: sll $10, $2, 3
-; MIPS64EL-NEXT: ori $2, $zero, 65535
-; MIPS64EL-NEXT: sllv $8, $2, $10
-; MIPS64EL-NEXT: nor $9, $zero, $8
-; MIPS64EL-NEXT: sllv $7, $1, $10
+; MIPS64EL-NEXT: sll $2, $2, 3
+; MIPS64EL-NEXT: ori $3, $zero, 65535
+; MIPS64EL-NEXT: sllv $3, $3, $2
+; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB6_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $2, 0($6)
-; MIPS64EL-NEXT: and $2, $2, $8
-; MIPS64EL-NEXT: and $7, $7, $8
-; MIPS64EL-NEXT: sltu $5, $2, $7
-; MIPS64EL-NEXT: move $3, $2
-; MIPS64EL-NEXT: movn $3, $7, $5
-; MIPS64EL-NEXT: and $3, $3, $8
-; MIPS64EL-NEXT: and $4, $2, $9
-; MIPS64EL-NEXT: or $4, $4, $3
-; MIPS64EL-NEXT: sc $4, 0($6)
-; MIPS64EL-NEXT: beqz $4, .LBB6_1
+; MIPS64EL-NEXT: ll $7, 0($1)
+; MIPS64EL-NEXT: and $7, $7, $3
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: sltu $10, $7, $5
+; MIPS64EL-NEXT: move $8, $7
+; MIPS64EL-NEXT: movn $8, $5, $10
+; MIPS64EL-NEXT: and $8, $8, $3
+; MIPS64EL-NEXT: and $9, $7, $4
+; MIPS64EL-NEXT: or $9, $9, $8
+; MIPS64EL-NEXT: sc $9, 0($1)
+; MIPS64EL-NEXT: beqz $9, .LBB6_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $1, $2, $8
-; MIPS64EL-NEXT: srlv $1, $1, $10
-; MIPS64EL-NEXT: seh $1, $1
+; MIPS64EL-NEXT: and $6, $7, $3
+; MIPS64EL-NEXT: srlv $6, $6, $2
+; MIPS64EL-NEXT: seh $6, $6
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
-; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: sync
+; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: daddiu $sp, $sp, 16
; MIPS64EL-NEXT: jr $ra
; MIPS64EL-NEXT: nop
@@ -2227,39 +2227,39 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64ELR6: # %bb.0: # %entry
; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16
; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16
-; MIPS64ELR6-NEXT: move $1, $5
+; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64ELR6-NEXT: sync
-; MIPS64ELR6-NEXT: daddiu $2, $zero, -4
-; MIPS64ELR6-NEXT: and $6, $4, $2
+; MIPS64ELR6-NEXT: daddiu $1, $zero, -4
+; MIPS64ELR6-NEXT: and $1, $4, $1
; MIPS64ELR6-NEXT: andi $2, $4, 3
-; MIPS64ELR6-NEXT: sll $10, $2, 3
-; MIPS64ELR6-NEXT: ori $2, $zero, 65535
-; MIPS64ELR6-NEXT: sllv $8, $2, $10
-; MIPS64ELR6-NEXT: nor $9, $zero, $8
-; MIPS64ELR6-NEXT: sllv $7, $1, $10
+; MIPS64ELR6-NEXT: sll $2, $2, 3
+; MIPS64ELR6-NEXT: ori $3, $zero, 65535
+; MIPS64ELR6-NEXT: sllv $3, $3, $2
+; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB6_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $2, 0($6)
-; MIPS64ELR6-NEXT: and $2, $2, $8
-; MIPS64ELR6-NEXT: and $7, $7, $8
-; MIPS64ELR6-NEXT: sltu $5, $2, $7
-; MIPS64ELR6-NEXT: seleqz $3, $2, $5
-; MIPS64ELR6-NEXT: selnez $5, $7, $5
-; MIPS64ELR6-NEXT: or $3, $3, $5
-; MIPS64ELR6-NEXT: and $3, $3, $8
-; MIPS64ELR6-NEXT: and $4, $2, $9
-; MIPS64ELR6-NEXT: or $4, $4, $3
-; MIPS64ELR6-NEXT: sc $4, 0($6)
-; MIPS64ELR6-NEXT: beqzc $4, .LBB6_1
+; MIPS64ELR6-NEXT: ll $7, 0($1)
+; MIPS64ELR6-NEXT: and $7, $7, $3
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: sltu $10, $7, $5
+; MIPS64ELR6-NEXT: seleqz $8, $7, $10
+; MIPS64ELR6-NEXT: selnez $10, $5, $10
+; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: and $8, $8, $3
+; MIPS64ELR6-NEXT: and $9, $7, $4
+; MIPS64ELR6-NEXT: or $9, $9, $8
+; MIPS64ELR6-NEXT: sc $9, 0($1)
+; MIPS64ELR6-NEXT: beqzc $9, .LBB6_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $1, $2, $8
-; MIPS64ELR6-NEXT: srlv $1, $1, $10
-; MIPS64ELR6-NEXT: seh $1, $1
+; MIPS64ELR6-NEXT: and $6, $7, $3
+; MIPS64ELR6-NEXT: srlv $6, $6, $2
+; MIPS64ELR6-NEXT: seh $6, $6
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
-; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: sync
+; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16
; MIPS64ELR6-NEXT: jrc $ra
entry:
@@ -2272,38 +2272,38 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS: # %bb.0: # %entry
; MIPS-NEXT: addiu $sp, $sp, -8
; MIPS-NEXT: .cfi_def_cfa_offset 8
-; MIPS-NEXT: # kill: def $at killed $a1
+; MIPS-NEXT: move $1, $5
; MIPS-NEXT: sync
-; MIPS-NEXT: addiu $1, $zero, -4
-; MIPS-NEXT: and $6, $4, $1
-; MIPS-NEXT: andi $1, $4, 3
-; MIPS-NEXT: xori $1, $1, 2
-; MIPS-NEXT: sll $10, $1, 3
-; MIPS-NEXT: ori $1, $zero, 65535
-; MIPS-NEXT: sllv $8, $1, $10
-; MIPS-NEXT: nor $9, $zero, $8
-; MIPS-NEXT: sllv $7, $5, $10
+; MIPS-NEXT: addiu $2, $zero, -4
+; MIPS-NEXT: and $2, $4, $2
+; MIPS-NEXT: andi $3, $4, 3
+; MIPS-NEXT: xori $3, $3, 2
+; MIPS-NEXT: sll $3, $3, 3
+; MIPS-NEXT: ori $4, $zero, 65535
+; MIPS-NEXT: sllv $4, $4, $3
+; MIPS-NEXT: nor $6, $zero, $4
+; MIPS-NEXT: sllv $5, $5, $3
; MIPS-NEXT: $BB7_1: # %entry
; MIPS-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS-NEXT: ll $2, 0($6)
-; MIPS-NEXT: sltu $5, $2, $7
-; MIPS-NEXT: move $3, $2
-; MIPS-NEXT: movz $3, $7, $5
-; MIPS-NEXT: and $3, $3, $8
-; MIPS-NEXT: and $4, $2, $9
-; MIPS-NEXT: or $4, $4, $3
-; MIPS-NEXT: sc $4, 0($6)
-; MIPS-NEXT: beqz $4, $BB7_1
+; MIPS-NEXT: ll $8, 0($2)
+; MIPS-NEXT: sltu $11, $8, $5
+; MIPS-NEXT: move $9, $8
+; MIPS-NEXT: movz $9, $5, $11
+; MIPS-NEXT: and $9, $9, $4
+; MIPS-NEXT: and $10, $8, $6
+; MIPS-NEXT: or $10, $10, $9
+; MIPS-NEXT: sc $10, 0($2)
+; MIPS-NEXT: beqz $10, $BB7_1
; MIPS-NEXT: nop
; MIPS-NEXT: # %bb.2: # %entry
-; MIPS-NEXT: and $1, $2, $8
-; MIPS-NEXT: srlv $1, $1, $10
-; MIPS-NEXT: seh $1, $1
+; MIPS-NEXT: and $7, $8, $4
+; MIPS-NEXT: srlv $7, $7, $3
+; MIPS-NEXT: seh $7, $7
; MIPS-NEXT: # %bb.3: # %entry
-; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS-NEXT: # %bb.4: # %entry
-; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS-NEXT: sync
+; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS-NEXT: addiu $sp, $sp, 8
; MIPS-NEXT: jr $ra
; MIPS-NEXT: nop
@@ -2312,38 +2312,38 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
; MIPSR6: # %bb.0: # %entry
; MIPSR6-NEXT: addiu $sp, $sp, -8
; MIPSR6-NEXT: .cfi_def_cfa_offset 8
-; MIPSR6-NEXT: # kill: def $at killed $a1
+; MIPSR6-NEXT: move $1, $5
; MIPSR6-NEXT: sync
-; MIPSR6-NEXT: addiu $1, $zero, -4
-; MIPSR6-NEXT: and $6, $4, $1
-; MIPSR6-NEXT: andi $1, $4, 3
-; MIPSR6-NEXT: xori $1, $1, 2
-; MIPSR6-NEXT: sll $10, $1, 3
-; MIPSR6-NEXT: ori $1, $zero, 65535
-; MIPSR6-NEXT: sllv $8, $1, $10
-; MIPSR6-NEXT: nor $9, $zero, $8
-; MIPSR6-NEXT: sllv $7, $5, $10
+; MIPSR6-NEXT: addiu $2, $zero, -4
+; MIPSR6-NEXT: and $2, $4, $2
+; MIPSR6-NEXT: andi $3, $4, 3
+; MIPSR6-NEXT: xori $3, $3, 2
+; MIPSR6-NEXT: sll $3, $3, 3
+; MIPSR6-NEXT: ori $4, $zero, 65535
+; MIPSR6-NEXT: sllv $4, $4, $3
+; MIPSR6-NEXT: nor $6, $zero, $4
+; MIPSR6-NEXT: sllv $5, $5, $3
; MIPSR6-NEXT: $BB7_1: # %entry
; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSR6-NEXT: ll $2, 0($6)
-; MIPSR6-NEXT: sltu $5, $2, $7
-; MIPSR6-NEXT: selnez $3, $2, $5
-; MIPSR6-NEXT: seleqz $5, $7, $5
-; MIPSR6-NEXT: or $3, $3, $5
-; MIPSR6-NEXT: and $3, $3, $8
-; MIPSR6-NEXT: and $4, $2, $9
-; MIPSR6-NEXT: or $4, $4, $3
-; MIPSR6-NEXT: sc $4, 0($6)
-; MIPSR6-NEXT: beqzc $4, $BB7_1
+; MIPSR6-NEXT: ll $8, 0($2)
+; MIPSR6-NEXT: sltu $11, $8, $5
+; MIPSR6-NEXT: selnez $9, $8, $11
+; MIPSR6-NEXT: seleqz $11, $5, $11
+; MIPSR6-NEXT: or $9, $9, $11
+; MIPSR6-NEXT: and $9, $9, $4
+; MIPSR6-NEXT: and $10, $8, $6
+; MIPSR6-NEXT: or $10, $10, $9
+; MIPSR6-NEXT: sc $10, 0($2)
+; MIPSR6-NEXT: beqzc $10, $BB7_1
; MIPSR6-NEXT: # %bb.2: # %entry
-; MIPSR6-NEXT: and $1, $2, $8
-; MIPSR6-NEXT: srlv $1, $1, $10
-; MIPSR6-NEXT: seh $1, $1
+; MIPSR6-NEXT: and $7, $8, $4
+; MIPSR6-NEXT: srlv $7, $7, $3
+; MIPSR6-NEXT: seh $7, $7
; MIPSR6-NEXT: # %bb.3: # %entry
-; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSR6-NEXT: # %bb.4: # %entry
-; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSR6-NEXT: sync
+; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSR6-NEXT: addiu $sp, $sp, 8
; MIPSR6-NEXT: jrc $ra
;
@@ -2351,37 +2351,37 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
; MM: # %bb.0: # %entry
; MM-NEXT: addiu $sp, $sp, -8
; MM-NEXT: .cfi_def_cfa_offset 8
-; MM-NEXT: # kill: def $at killed $a1
+; MM-NEXT: move $1, $5
; MM-NEXT: sync
-; MM-NEXT: addiu $1, $zero, -4
-; MM-NEXT: and $6, $4, $1
-; MM-NEXT: andi $1, $4, 3
-; MM-NEXT: xori $1, $1, 2
-; MM-NEXT: sll $10, $1, 3
-; MM-NEXT: ori $1, $zero, 65535
-; MM-NEXT: sllv $8, $1, $10
-; MM-NEXT: nor $9, $zero, $8
-; MM-NEXT: sllv $7, $5, $10
+; MM-NEXT: addiu $2, $zero, -4
+; MM-NEXT: and $2, $4, $2
+; MM-NEXT: andi $3, $4, 3
+; MM-NEXT: xori $3, $3, 2
+; MM-NEXT: sll $3, $3, 3
+; MM-NEXT: ori $4, $zero, 65535
+; MM-NEXT: sllv $4, $4, $3
+; MM-NEXT: nor $6, $zero, $4
+; MM-NEXT: sllv $5, $5, $3
; MM-NEXT: $BB7_1: # %entry
; MM-NEXT: # =>This Inner Loop Header: Depth=1
-; MM-NEXT: ll $2, 0($6)
-; MM-NEXT: sltu $5, $2, $7
-; MM-NEXT: or $3, $2, $zero
-; MM-NEXT: movz $3, $7, $5
-; MM-NEXT: and $3, $3, $8
-; MM-NEXT: and $4, $2, $9
-; MM-NEXT: or $4, $4, $3
-; MM-NEXT: sc $4, 0($6)
-; MM-NEXT: beqzc $4, $BB7_1
+; MM-NEXT: ll $8, 0($2)
+; MM-NEXT: sltu $11, $8, $5
+; MM-NEXT: or $9, $8, $zero
+; MM-NEXT: movz $9, $5, $11
+; MM-NEXT: and $9, $9, $4
+; MM-NEXT: and $10, $8, $6
+; MM-NEXT: or $10, $10, $9
+; MM-NEXT: sc $10, 0($2)
+; MM-NEXT: beqzc $10, $BB7_1
; MM-NEXT: # %bb.2: # %entry
-; MM-NEXT: and $1, $2, $8
-; MM-NEXT: srlv $1, $1, $10
-; MM-NEXT: seh $1, $1
+; MM-NEXT: and $7, $8, $4
+; MM-NEXT: srlv $7, $7, $3
+; MM-NEXT: seh $7, $7
; MM-NEXT: # %bb.3: # %entry
-; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MM-NEXT: # %bb.4: # %entry
-; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MM-NEXT: sync
+; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MM-NEXT: addiusp 8
; MM-NEXT: jrc $ra
;
@@ -2389,38 +2389,38 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
; MMR6: # %bb.0: # %entry
; MMR6-NEXT: addiu $sp, $sp, -8
; MMR6-NEXT: .cfi_def_cfa_offset 8
-; MMR6-NEXT: # kill: def $at killed $a1
+; MMR6-NEXT: move $1, $5
; MMR6-NEXT: sync
-; MMR6-NEXT: addiu $1, $zero, -4
-; MMR6-NEXT: and $6, $4, $1
-; MMR6-NEXT: andi $1, $4, 3
-; MMR6-NEXT: xori $1, $1, 2
-; MMR6-NEXT: sll $10, $1, 3
-; MMR6-NEXT: ori $1, $zero, 65535
-; MMR6-NEXT: sllv $8, $1, $10
-; MMR6-NEXT: nor $9, $zero, $8
-; MMR6-NEXT: sllv $7, $5, $10
+; MMR6-NEXT: addiu $2, $zero, -4
+; MMR6-NEXT: and $2, $4, $2
+; MMR6-NEXT: andi $3, $4, 3
+; MMR6-NEXT: xori $3, $3, 2
+; MMR6-NEXT: sll $3, $3, 3
+; MMR6-NEXT: ori $4, $zero, 65535
+; MMR6-NEXT: sllv $4, $4, $3
+; MMR6-NEXT: nor $6, $zero, $4
+; MMR6-NEXT: sllv $5, $5, $3
; MMR6-NEXT: $BB7_1: # %entry
; MMR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MMR6-NEXT: ll $2, 0($6)
-; MMR6-NEXT: sltu $5, $2, $7
-; MMR6-NEXT: selnez $3, $2, $5
-; MMR6-NEXT: seleqz $5, $7, $5
-; MMR6-NEXT: or $3, $3, $5
-; MMR6-NEXT: and $3, $3, $8
-; MMR6-NEXT: and $4, $2, $9
-; MMR6-NEXT: or $4, $4, $3
-; MMR6-NEXT: sc $4, 0($6)
-; MMR6-NEXT: beqc $4, $zero, $BB7_1
+; MMR6-NEXT: ll $8, 0($2)
+; MMR6-NEXT: sltu $11, $8, $5
+; MMR6-NEXT: selnez $9, $8, $11
+; MMR6-NEXT: seleqz $11, $5, $11
+; MMR6-NEXT: or $9, $9, $11
+; MMR6-NEXT: and $9, $9, $4
+; MMR6-NEXT: and $10, $8, $6
+; MMR6-NEXT: or $10, $10, $9
+; MMR6-NEXT: sc $10, 0($2)
+; MMR6-NEXT: beqc $10, $zero, $BB7_1
; MMR6-NEXT: # %bb.2: # %entry
-; MMR6-NEXT: and $1, $2, $8
-; MMR6-NEXT: srlv $1, $1, $10
-; MMR6-NEXT: seh $1, $1
+; MMR6-NEXT: and $7, $8, $4
+; MMR6-NEXT: srlv $7, $7, $3
+; MMR6-NEXT: seh $7, $7
; MMR6-NEXT: # %bb.3: # %entry
-; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMR6-NEXT: # %bb.4: # %entry
-; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMR6-NEXT: sync
+; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMR6-NEXT: addiu $sp, $sp, 8
; MMR6-NEXT: jrc $ra
;
@@ -2428,39 +2428,39 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
; MIPSEL: # %bb.0: # %entry
; MIPSEL-NEXT: addiu $sp, $sp, -8
; MIPSEL-NEXT: .cfi_def_cfa_offset 8
-; MIPSEL-NEXT: # kill: def $at killed $a1
+; MIPSEL-NEXT: move $1, $5
; MIPSEL-NEXT: sync
-; MIPSEL-NEXT: addiu $1, $zero, -4
-; MIPSEL-NEXT: and $6, $4, $1
-; MIPSEL-NEXT: andi $1, $4, 3
-; MIPSEL-NEXT: sll $10, $1, 3
-; MIPSEL-NEXT: ori $1, $zero, 65535
-; MIPSEL-NEXT: sllv $8, $1, $10
-; MIPSEL-NEXT: nor $9, $zero, $8
-; MIPSEL-NEXT: sllv $7, $5, $10
+; MIPSEL-NEXT: addiu $2, $zero, -4
+; MIPSEL-NEXT: and $2, $4, $2
+; MIPSEL-NEXT: andi $3, $4, 3
+; MIPSEL-NEXT: sll $3, $3, 3
+; MIPSEL-NEXT: ori $4, $zero, 65535
+; MIPSEL-NEXT: sllv $4, $4, $3
+; MIPSEL-NEXT: nor $6, $zero, $4
+; MIPSEL-NEXT: sllv $5, $5, $3
; MIPSEL-NEXT: $BB7_1: # %entry
; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSEL-NEXT: ll $2, 0($6)
-; MIPSEL-NEXT: and $2, $2, $8
-; MIPSEL-NEXT: and $7, $7, $8
-; MIPSEL-NEXT: sltu $5, $2, $7
-; MIPSEL-NEXT: move $3, $2
-; MIPSEL-NEXT: movz $3, $7, $5
-; MIPSEL-NEXT: and $3, $3, $8
-; MIPSEL-NEXT: and $4, $2, $9
-; MIPSEL-NEXT: or $4, $4, $3
-; MIPSEL-NEXT: sc $4, 0($6)
-; MIPSEL-NEXT: beqz $4, $BB7_1
+; MIPSEL-NEXT: ll $8, 0($2)
+; MIPSEL-NEXT: and $8, $8, $4
+; MIPSEL-NEXT: and $5, $5, $4
+; MIPSEL-NEXT: sltu $11, $8, $5
+; MIPSEL-NEXT: move $9, $8
+; MIPSEL-NEXT: movz $9, $5, $11
+; MIPSEL-NEXT: and $9, $9, $4
+; MIPSEL-NEXT: and $10, $8, $6
+; MIPSEL-NEXT: or $10, $10, $9
+; MIPSEL-NEXT: sc $10, 0($2)
+; MIPSEL-NEXT: beqz $10, $BB7_1
; MIPSEL-NEXT: nop
; MIPSEL-NEXT: # %bb.2: # %entry
-; MIPSEL-NEXT: and $1, $2, $8
-; MIPSEL-NEXT: srlv $1, $1, $10
-; MIPSEL-NEXT: seh $1, $1
+; MIPSEL-NEXT: and $7, $8, $4
+; MIPSEL-NEXT: srlv $7, $7, $3
+; MIPSEL-NEXT: seh $7, $7
; MIPSEL-NEXT: # %bb.3: # %entry
-; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSEL-NEXT: # %bb.4: # %entry
-; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSEL-NEXT: sync
+; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSEL-NEXT: addiu $sp, $sp, 8
; MIPSEL-NEXT: jr $ra
; MIPSEL-NEXT: nop
@@ -2469,39 +2469,39 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
; MIPSELR6: # %bb.0: # %entry
; MIPSELR6-NEXT: addiu $sp, $sp, -8
; MIPSELR6-NEXT: .cfi_def_cfa_offset 8
-; MIPSELR6-NEXT: # kill: def $at killed $a1
+; MIPSELR6-NEXT: move $1, $5
; MIPSELR6-NEXT: sync
-; MIPSELR6-NEXT: addiu $1, $zero, -4
-; MIPSELR6-NEXT: and $6, $4, $1
-; MIPSELR6-NEXT: andi $1, $4, 3
-; MIPSELR6-NEXT: sll $10, $1, 3
-; MIPSELR6-NEXT: ori $1, $zero, 65535
-; MIPSELR6-NEXT: sllv $8, $1, $10
-; MIPSELR6-NEXT: nor $9, $zero, $8
-; MIPSELR6-NEXT: sllv $7, $5, $10
+; MIPSELR6-NEXT: addiu $2, $zero, -4
+; MIPSELR6-NEXT: and $2, $4, $2
+; MIPSELR6-NEXT: andi $3, $4, 3
+; MIPSELR6-NEXT: sll $3, $3, 3
+; MIPSELR6-NEXT: ori $4, $zero, 65535
+; MIPSELR6-NEXT: sllv $4, $4, $3
+; MIPSELR6-NEXT: nor $6, $zero, $4
+; MIPSELR6-NEXT: sllv $5, $5, $3
; MIPSELR6-NEXT: $BB7_1: # %entry
; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSELR6-NEXT: ll $2, 0($6)
-; MIPSELR6-NEXT: and $2, $2, $8
-; MIPSELR6-NEXT: and $7, $7, $8
-; MIPSELR6-NEXT: sltu $5, $2, $7
-; MIPSELR6-NEXT: selnez $3, $2, $5
-; MIPSELR6-NEXT: seleqz $5, $7, $5
-; MIPSELR6-NEXT: or $3, $3, $5
-; MIPSELR6-NEXT: and $3, $3, $8
-; MIPSELR6-NEXT: and $4, $2, $9
-; MIPSELR6-NEXT: or $4, $4, $3
-; MIPSELR6-NEXT: sc $4, 0($6)
-; MIPSELR6-NEXT: beqzc $4, $BB7_1
+; MIPSELR6-NEXT: ll $8, 0($2)
+; MIPSELR6-NEXT: and $8, $8, $4
+; MIPSELR6-NEXT: and $5, $5, $4
+; MIPSELR6-NEXT: sltu $11, $8, $5
+; MIPSELR6-NEXT: selnez $9, $8, $11
+; MIPSELR6-NEXT: seleqz $11, $5, $11
+; MIPSELR6-NEXT: or $9, $9, $11
+; MIPSELR6-NEXT: and $9, $9, $4
+; MIPSELR6-NEXT: and $10, $8, $6
+; MIPSELR6-NEXT: or $10, $10, $9
+; MIPSELR6-NEXT: sc $10, 0($2)
+; MIPSELR6-NEXT: beqzc $10, $BB7_1
; MIPSELR6-NEXT: # %bb.2: # %entry
-; MIPSELR6-NEXT: and $1, $2, $8
-; MIPSELR6-NEXT: srlv $1, $1, $10
-; MIPSELR6-NEXT: seh $1, $1
+; MIPSELR6-NEXT: and $7, $8, $4
+; MIPSELR6-NEXT: srlv $7, $7, $3
+; MIPSELR6-NEXT: seh $7, $7
; MIPSELR6-NEXT: # %bb.3: # %entry
-; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSELR6-NEXT: # %bb.4: # %entry
-; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSELR6-NEXT: sync
+; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSELR6-NEXT: addiu $sp, $sp, 8
; MIPSELR6-NEXT: jrc $ra
;
@@ -2509,38 +2509,38 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
; MMEL: # %bb.0: # %entry
; MMEL-NEXT: addiu $sp, $sp, -8
; MMEL-NEXT: .cfi_def_cfa_offset 8
-; MMEL-NEXT: # kill: def $at killed $a1
+; MMEL-NEXT: move $1, $5
; MMEL-NEXT: sync
-; MMEL-NEXT: addiu $1, $zero, -4
-; MMEL-NEXT: and $6, $4, $1
-; MMEL-NEXT: andi $1, $4, 3
-; MMEL-NEXT: sll $10, $1, 3
-; MMEL-NEXT: ori $1, $zero, 65535
-; MMEL-NEXT: sllv $8, $1, $10
-; MMEL-NEXT: nor $9, $zero, $8
-; MMEL-NEXT: sllv $7, $5, $10
+; MMEL-NEXT: addiu $2, $zero, -4
+; MMEL-NEXT: and $2, $4, $2
+; MMEL-NEXT: andi $3, $4, 3
+; MMEL-NEXT: sll $3, $3, 3
+; MMEL-NEXT: ori $4, $zero, 65535
+; MMEL-NEXT: sllv $4, $4, $3
+; MMEL-NEXT: nor $6, $zero, $4
+; MMEL-NEXT: sllv $5, $5, $3
; MMEL-NEXT: $BB7_1: # %entry
; MMEL-NEXT: # =>This Inner Loop Header: Depth=1
-; MMEL-NEXT: ll $2, 0($6)
-; MMEL-NEXT: and $2, $2, $8
-; MMEL-NEXT: and $7, $7, $8
-; MMEL-NEXT: sltu $5, $2, $7
-; MMEL-NEXT: or $3, $2, $zero
-; MMEL-NEXT: movz $3, $7, $5
-; MMEL-NEXT: and $3, $3, $8
-; MMEL-NEXT: and $4, $2, $9
-; MMEL-NEXT: or $4, $4, $3
-; MMEL-NEXT: sc $4, 0($6)
-; MMEL-NEXT: beqzc $4, $BB7_1
+; MMEL-NEXT: ll $8, 0($2)
+; MMEL-NEXT: and $8, $8, $4
+; MMEL-NEXT: and $5, $5, $4
+; MMEL-NEXT: sltu $11, $8, $5
+; MMEL-NEXT: or $9, $8, $zero
+; MMEL-NEXT: movz $9, $5, $11
+; MMEL-NEXT: and $9, $9, $4
+; MMEL-NEXT: and $10, $8, $6
+; MMEL-NEXT: or $10, $10, $9
+; MMEL-NEXT: sc $10, 0($2)
+; MMEL-NEXT: beqzc $10, $BB7_1
; MMEL-NEXT: # %bb.2: # %entry
-; MMEL-NEXT: and $1, $2, $8
-; MMEL-NEXT: srlv $1, $1, $10
-; MMEL-NEXT: seh $1, $1
+; MMEL-NEXT: and $7, $8, $4
+; MMEL-NEXT: srlv $7, $7, $3
+; MMEL-NEXT: seh $7, $7
; MMEL-NEXT: # %bb.3: # %entry
-; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMEL-NEXT: # %bb.4: # %entry
-; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMEL-NEXT: sync
+; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMEL-NEXT: addiusp 8
; MMEL-NEXT: jrc $ra
;
@@ -2548,39 +2548,39 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
; MMELR6: # %bb.0: # %entry
; MMELR6-NEXT: addiu $sp, $sp, -8
; MMELR6-NEXT: .cfi_def_cfa_offset 8
-; MMELR6-NEXT: # kill: def $at killed $a1
+; MMELR6-NEXT: move $1, $5
; MMELR6-NEXT: sync
-; MMELR6-NEXT: addiu $1, $zero, -4
-; MMELR6-NEXT: and $6, $4, $1
-; MMELR6-NEXT: andi $1, $4, 3
-; MMELR6-NEXT: sll $10, $1, 3
-; MMELR6-NEXT: ori $1, $zero, 65535
-; MMELR6-NEXT: sllv $8, $1, $10
-; MMELR6-NEXT: nor $9, $zero, $8
-; MMELR6-NEXT: sllv $7, $5, $10
+; MMELR6-NEXT: addiu $2, $zero, -4
+; MMELR6-NEXT: and $2, $4, $2
+; MMELR6-NEXT: andi $3, $4, 3
+; MMELR6-NEXT: sll $3, $3, 3
+; MMELR6-NEXT: ori $4, $zero, 65535
+; MMELR6-NEXT: sllv $4, $4, $3
+; MMELR6-NEXT: nor $6, $zero, $4
+; MMELR6-NEXT: sllv $5, $5, $3
; MMELR6-NEXT: $BB7_1: # %entry
; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MMELR6-NEXT: ll $2, 0($6)
-; MMELR6-NEXT: and $2, $2, $8
-; MMELR6-NEXT: and $7, $7, $8
-; MMELR6-NEXT: sltu $5, $2, $7
-; MMELR6-NEXT: selnez $3, $2, $5
-; MMELR6-NEXT: seleqz $5, $7, $5
-; MMELR6-NEXT: or $3, $3, $5
-; MMELR6-NEXT: and $3, $3, $8
-; MMELR6-NEXT: and $4, $2, $9
-; MMELR6-NEXT: or $4, $4, $3
-; MMELR6-NEXT: sc $4, 0($6)
-; MMELR6-NEXT: beqc $4, $zero, $BB7_1
+; MMELR6-NEXT: ll $8, 0($2)
+; MMELR6-NEXT: and $8, $8, $4
+; MMELR6-NEXT: and $5, $5, $4
+; MMELR6-NEXT: sltu $11, $8, $5
+; MMELR6-NEXT: selnez $9, $8, $11
+; MMELR6-NEXT: seleqz $11, $5, $11
+; MMELR6-NEXT: or $9, $9, $11
+; MMELR6-NEXT: and $9, $9, $4
+; MMELR6-NEXT: and $10, $8, $6
+; MMELR6-NEXT: or $10, $10, $9
+; MMELR6-NEXT: sc $10, 0($2)
+; MMELR6-NEXT: beqc $10, $zero, $BB7_1
; MMELR6-NEXT: # %bb.2: # %entry
-; MMELR6-NEXT: and $1, $2, $8
-; MMELR6-NEXT: srlv $1, $1, $10
-; MMELR6-NEXT: seh $1, $1
+; MMELR6-NEXT: and $7, $8, $4
+; MMELR6-NEXT: srlv $7, $7, $3
+; MMELR6-NEXT: seh $7, $7
; MMELR6-NEXT: # %bb.3: # %entry
-; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMELR6-NEXT: # %bb.4: # %entry
-; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMELR6-NEXT: sync
+; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMELR6-NEXT: addiu $sp, $sp, 8
; MMELR6-NEXT: jrc $ra
;
@@ -2588,38 +2588,38 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: daddiu $sp, $sp, -16
; MIPS64-NEXT: .cfi_def_cfa_offset 16
-; MIPS64-NEXT: move $1, $5
+; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64-NEXT: sync
-; MIPS64-NEXT: daddiu $2, $zero, -4
-; MIPS64-NEXT: and $6, $4, $2
+; MIPS64-NEXT: daddiu $1, $zero, -4
+; MIPS64-NEXT: and $1, $4, $1
; MIPS64-NEXT: andi $2, $4, 3
; MIPS64-NEXT: xori $2, $2, 2
-; MIPS64-NEXT: sll $10, $2, 3
-; MIPS64-NEXT: ori $2, $zero, 65535
-; MIPS64-NEXT: sllv $8, $2, $10
-; MIPS64-NEXT: nor $9, $zero, $8
-; MIPS64-NEXT: sllv $7, $1, $10
+; MIPS64-NEXT: sll $2, $2, 3
+; MIPS64-NEXT: ori $3, $zero, 65535
+; MIPS64-NEXT: sllv $3, $3, $2
+; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB7_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $2, 0($6)
-; MIPS64-NEXT: sltu $5, $2, $7
-; MIPS64-NEXT: move $3, $2
-; MIPS64-NEXT: movz $3, $7, $5
-; MIPS64-NEXT: and $3, $3, $8
-; MIPS64-NEXT: and $4, $2, $9
-; MIPS64-NEXT: or $4, $4, $3
-; MIPS64-NEXT: sc $4, 0($6)
-; MIPS64-NEXT: beqz $4, .LBB7_1
+; MIPS64-NEXT: ll $7, 0($1)
+; MIPS64-NEXT: sltu $10, $7, $5
+; MIPS64-NEXT: move $8, $7
+; MIPS64-NEXT: movz $8, $5, $10
+; MIPS64-NEXT: and $8, $8, $3
+; MIPS64-NEXT: and $9, $7, $4
+; MIPS64-NEXT: or $9, $9, $8
+; MIPS64-NEXT: sc $9, 0($1)
+; MIPS64-NEXT: beqz $9, .LBB7_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $1, $2, $8
-; MIPS64-NEXT: srlv $1, $1, $10
-; MIPS64-NEXT: seh $1, $1
+; MIPS64-NEXT: and $6, $7, $3
+; MIPS64-NEXT: srlv $6, $6, $2
+; MIPS64-NEXT: seh $6, $6
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
-; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: sync
+; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: daddiu $sp, $sp, 16
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
@@ -2628,38 +2628,38 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: daddiu $sp, $sp, -16
; MIPS64R6-NEXT: .cfi_def_cfa_offset 16
-; MIPS64R6-NEXT: move $1, $5
+; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64R6-NEXT: sync
-; MIPS64R6-NEXT: daddiu $2, $zero, -4
-; MIPS64R6-NEXT: and $6, $4, $2
+; MIPS64R6-NEXT: daddiu $1, $zero, -4
+; MIPS64R6-NEXT: and $1, $4, $1
; MIPS64R6-NEXT: andi $2, $4, 3
; MIPS64R6-NEXT: xori $2, $2, 2
-; MIPS64R6-NEXT: sll $10, $2, 3
-; MIPS64R6-NEXT: ori $2, $zero, 65535
-; MIPS64R6-NEXT: sllv $8, $2, $10
-; MIPS64R6-NEXT: nor $9, $zero, $8
-; MIPS64R6-NEXT: sllv $7, $1, $10
+; MIPS64R6-NEXT: sll $2, $2, 3
+; MIPS64R6-NEXT: ori $3, $zero, 65535
+; MIPS64R6-NEXT: sllv $3, $3, $2
+; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB7_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $2, 0($6)
-; MIPS64R6-NEXT: sltu $5, $2, $7
-; MIPS64R6-NEXT: selnez $3, $2, $5
-; MIPS64R6-NEXT: seleqz $5, $7, $5
-; MIPS64R6-NEXT: or $3, $3, $5
-; MIPS64R6-NEXT: and $3, $3, $8
-; MIPS64R6-NEXT: and $4, $2, $9
-; MIPS64R6-NEXT: or $4, $4, $3
-; MIPS64R6-NEXT: sc $4, 0($6)
-; MIPS64R6-NEXT: beqzc $4, .LBB7_1
+; MIPS64R6-NEXT: ll $7, 0($1)
+; MIPS64R6-NEXT: sltu $10, $7, $5
+; MIPS64R6-NEXT: selnez $8, $7, $10
+; MIPS64R6-NEXT: seleqz $10, $5, $10
+; MIPS64R6-NEXT: or $8, $8, $10
+; MIPS64R6-NEXT: and $8, $8, $3
+; MIPS64R6-NEXT: and $9, $7, $4
+; MIPS64R6-NEXT: or $9, $9, $8
+; MIPS64R6-NEXT: sc $9, 0($1)
+; MIPS64R6-NEXT: beqzc $9, .LBB7_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $1, $2, $8
-; MIPS64R6-NEXT: srlv $1, $1, $10
-; MIPS64R6-NEXT: seh $1, $1
+; MIPS64R6-NEXT: and $6, $7, $3
+; MIPS64R6-NEXT: srlv $6, $6, $2
+; MIPS64R6-NEXT: seh $6, $6
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
-; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: sync
+; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: daddiu $sp, $sp, 16
; MIPS64R6-NEXT: jrc $ra
;
@@ -2667,39 +2667,39 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64EL: # %bb.0: # %entry
; MIPS64EL-NEXT: daddiu $sp, $sp, -16
; MIPS64EL-NEXT: .cfi_def_cfa_offset 16
-; MIPS64EL-NEXT: move $1, $5
+; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64EL-NEXT: sync
-; MIPS64EL-NEXT: daddiu $2, $zero, -4
-; MIPS64EL-NEXT: and $6, $4, $2
+; MIPS64EL-NEXT: daddiu $1, $zero, -4
+; MIPS64EL-NEXT: and $1, $4, $1
; MIPS64EL-NEXT: andi $2, $4, 3
-; MIPS64EL-NEXT: sll $10, $2, 3
-; MIPS64EL-NEXT: ori $2, $zero, 65535
-; MIPS64EL-NEXT: sllv $8, $2, $10
-; MIPS64EL-NEXT: nor $9, $zero, $8
-; MIPS64EL-NEXT: sllv $7, $1, $10
+; MIPS64EL-NEXT: sll $2, $2, 3
+; MIPS64EL-NEXT: ori $3, $zero, 65535
+; MIPS64EL-NEXT: sllv $3, $3, $2
+; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB7_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $2, 0($6)
-; MIPS64EL-NEXT: and $2, $2, $8
-; MIPS64EL-NEXT: and $7, $7, $8
-; MIPS64EL-NEXT: sltu $5, $2, $7
-; MIPS64EL-NEXT: move $3, $2
-; MIPS64EL-NEXT: movz $3, $7, $5
-; MIPS64EL-NEXT: and $3, $3, $8
-; MIPS64EL-NEXT: and $4, $2, $9
-; MIPS64EL-NEXT: or $4, $4, $3
-; MIPS64EL-NEXT: sc $4, 0($6)
-; MIPS64EL-NEXT: beqz $4, .LBB7_1
+; MIPS64EL-NEXT: ll $7, 0($1)
+; MIPS64EL-NEXT: and $7, $7, $3
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: sltu $10, $7, $5
+; MIPS64EL-NEXT: move $8, $7
+; MIPS64EL-NEXT: movz $8, $5, $10
+; MIPS64EL-NEXT: and $8, $8, $3
+; MIPS64EL-NEXT: and $9, $7, $4
+; MIPS64EL-NEXT: or $9, $9, $8
+; MIPS64EL-NEXT: sc $9, 0($1)
+; MIPS64EL-NEXT: beqz $9, .LBB7_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $1, $2, $8
-; MIPS64EL-NEXT: srlv $1, $1, $10
-; MIPS64EL-NEXT: seh $1, $1
+; MIPS64EL-NEXT: and $6, $7, $3
+; MIPS64EL-NEXT: srlv $6, $6, $2
+; MIPS64EL-NEXT: seh $6, $6
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
-; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: sync
+; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: daddiu $sp, $sp, 16
; MIPS64EL-NEXT: jr $ra
; MIPS64EL-NEXT: nop
@@ -2708,39 +2708,39 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64ELR6: # %bb.0: # %entry
; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16
; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16
-; MIPS64ELR6-NEXT: move $1, $5
+; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64ELR6-NEXT: sync
-; MIPS64ELR6-NEXT: daddiu $2, $zero, -4
-; MIPS64ELR6-NEXT: and $6, $4, $2
+; MIPS64ELR6-NEXT: daddiu $1, $zero, -4
+; MIPS64ELR6-NEXT: and $1, $4, $1
; MIPS64ELR6-NEXT: andi $2, $4, 3
-; MIPS64ELR6-NEXT: sll $10, $2, 3
-; MIPS64ELR6-NEXT: ori $2, $zero, 65535
-; MIPS64ELR6-NEXT: sllv $8, $2, $10
-; MIPS64ELR6-NEXT: nor $9, $zero, $8
-; MIPS64ELR6-NEXT: sllv $7, $1, $10
+; MIPS64ELR6-NEXT: sll $2, $2, 3
+; MIPS64ELR6-NEXT: ori $3, $zero, 65535
+; MIPS64ELR6-NEXT: sllv $3, $3, $2
+; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB7_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $2, 0($6)
-; MIPS64ELR6-NEXT: and $2, $2, $8
-; MIPS64ELR6-NEXT: and $7, $7, $8
-; MIPS64ELR6-NEXT: sltu $5, $2, $7
-; MIPS64ELR6-NEXT: selnez $3, $2, $5
-; MIPS64ELR6-NEXT: seleqz $5, $7, $5
-; MIPS64ELR6-NEXT: or $3, $3, $5
-; MIPS64ELR6-NEXT: and $3, $3, $8
-; MIPS64ELR6-NEXT: and $4, $2, $9
-; MIPS64ELR6-NEXT: or $4, $4, $3
-; MIPS64ELR6-NEXT: sc $4, 0($6)
-; MIPS64ELR6-NEXT: beqzc $4, .LBB7_1
+; MIPS64ELR6-NEXT: ll $7, 0($1)
+; MIPS64ELR6-NEXT: and $7, $7, $3
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: sltu $10, $7, $5
+; MIPS64ELR6-NEXT: selnez $8, $7, $10
+; MIPS64ELR6-NEXT: seleqz $10, $5, $10
+; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: and $8, $8, $3
+; MIPS64ELR6-NEXT: and $9, $7, $4
+; MIPS64ELR6-NEXT: or $9, $9, $8
+; MIPS64ELR6-NEXT: sc $9, 0($1)
+; MIPS64ELR6-NEXT: beqzc $9, .LBB7_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $1, $2, $8
-; MIPS64ELR6-NEXT: srlv $1, $1, $10
-; MIPS64ELR6-NEXT: seh $1, $1
+; MIPS64ELR6-NEXT: and $6, $7, $3
+; MIPS64ELR6-NEXT: srlv $6, $6, $2
+; MIPS64ELR6-NEXT: seh $6, $6
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
-; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: sync
+; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16
; MIPS64ELR6-NEXT: jrc $ra
entry:
@@ -2754,38 +2754,38 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS: # %bb.0: # %entry
; MIPS-NEXT: addiu $sp, $sp, -8
; MIPS-NEXT: .cfi_def_cfa_offset 8
-; MIPS-NEXT: # kill: def $at killed $a1
+; MIPS-NEXT: move $1, $5
; MIPS-NEXT: sync
-; MIPS-NEXT: addiu $1, $zero, -4
-; MIPS-NEXT: and $6, $4, $1
-; MIPS-NEXT: andi $1, $4, 3
-; MIPS-NEXT: xori $1, $1, 3
-; MIPS-NEXT: sll $10, $1, 3
-; MIPS-NEXT: ori $1, $zero, 255
-; MIPS-NEXT: sllv $8, $1, $10
-; MIPS-NEXT: nor $9, $zero, $8
-; MIPS-NEXT: sllv $7, $5, $10
+; MIPS-NEXT: addiu $2, $zero, -4
+; MIPS-NEXT: and $2, $4, $2
+; MIPS-NEXT: andi $3, $4, 3
+; MIPS-NEXT: xori $3, $3, 3
+; MIPS-NEXT: sll $3, $3, 3
+; MIPS-NEXT: ori $4, $zero, 255
+; MIPS-NEXT: sllv $4, $4, $3
+; MIPS-NEXT: nor $6, $zero, $4
+; MIPS-NEXT: sllv $5, $5, $3
; MIPS-NEXT: $BB8_1: # %entry
; MIPS-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS-NEXT: ll $2, 0($6)
-; MIPS-NEXT: slt $5, $2, $7
-; MIPS-NEXT: move $3, $2
-; MIPS-NEXT: movn $3, $7, $5
-; MIPS-NEXT: and $3, $3, $8
-; MIPS-NEXT: and $4, $2, $9
-; MIPS-NEXT: or $4, $4, $3
-; MIPS-NEXT: sc $4, 0($6)
-; MIPS-NEXT: beqz $4, $BB8_1
+; MIPS-NEXT: ll $8, 0($2)
+; MIPS-NEXT: slt $11, $8, $5
+; MIPS-NEXT: move $9, $8
+; MIPS-NEXT: movn $9, $5, $11
+; MIPS-NEXT: and $9, $9, $4
+; MIPS-NEXT: and $10, $8, $6
+; MIPS-NEXT: or $10, $10, $9
+; MIPS-NEXT: sc $10, 0($2)
+; MIPS-NEXT: beqz $10, $BB8_1
; MIPS-NEXT: nop
; MIPS-NEXT: # %bb.2: # %entry
-; MIPS-NEXT: and $1, $2, $8
-; MIPS-NEXT: srlv $1, $1, $10
-; MIPS-NEXT: seh $1, $1
+; MIPS-NEXT: and $7, $8, $4
+; MIPS-NEXT: srlv $7, $7, $3
+; MIPS-NEXT: seh $7, $7
; MIPS-NEXT: # %bb.3: # %entry
-; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS-NEXT: # %bb.4: # %entry
-; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS-NEXT: sync
+; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS-NEXT: addiu $sp, $sp, 8
; MIPS-NEXT: jr $ra
; MIPS-NEXT: nop
@@ -2794,38 +2794,38 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
; MIPSR6: # %bb.0: # %entry
; MIPSR6-NEXT: addiu $sp, $sp, -8
; MIPSR6-NEXT: .cfi_def_cfa_offset 8
-; MIPSR6-NEXT: # kill: def $at killed $a1
+; MIPSR6-NEXT: move $1, $5
; MIPSR6-NEXT: sync
-; MIPSR6-NEXT: addiu $1, $zero, -4
-; MIPSR6-NEXT: and $6, $4, $1
-; MIPSR6-NEXT: andi $1, $4, 3
-; MIPSR6-NEXT: xori $1, $1, 3
-; MIPSR6-NEXT: sll $10, $1, 3
-; MIPSR6-NEXT: ori $1, $zero, 255
-; MIPSR6-NEXT: sllv $8, $1, $10
-; MIPSR6-NEXT: nor $9, $zero, $8
-; MIPSR6-NEXT: sllv $7, $5, $10
+; MIPSR6-NEXT: addiu $2, $zero, -4
+; MIPSR6-NEXT: and $2, $4, $2
+; MIPSR6-NEXT: andi $3, $4, 3
+; MIPSR6-NEXT: xori $3, $3, 3
+; MIPSR6-NEXT: sll $3, $3, 3
+; MIPSR6-NEXT: ori $4, $zero, 255
+; MIPSR6-NEXT: sllv $4, $4, $3
+; MIPSR6-NEXT: nor $6, $zero, $4
+; MIPSR6-NEXT: sllv $5, $5, $3
; MIPSR6-NEXT: $BB8_1: # %entry
; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSR6-NEXT: ll $2, 0($6)
-; MIPSR6-NEXT: slt $5, $2, $7
-; MIPSR6-NEXT: seleqz $3, $2, $5
-; MIPSR6-NEXT: selnez $5, $7, $5
-; MIPSR6-NEXT: or $3, $3, $5
-; MIPSR6-NEXT: and $3, $3, $8
-; MIPSR6-NEXT: and $4, $2, $9
-; MIPSR6-NEXT: or $4, $4, $3
-; MIPSR6-NEXT: sc $4, 0($6)
-; MIPSR6-NEXT: beqzc $4, $BB8_1
+; MIPSR6-NEXT: ll $8, 0($2)
+; MIPSR6-NEXT: slt $11, $8, $5
+; MIPSR6-NEXT: seleqz $9, $8, $11
+; MIPSR6-NEXT: selnez $11, $5, $11
+; MIPSR6-NEXT: or $9, $9, $11
+; MIPSR6-NEXT: and $9, $9, $4
+; MIPSR6-NEXT: and $10, $8, $6
+; MIPSR6-NEXT: or $10, $10, $9
+; MIPSR6-NEXT: sc $10, 0($2)
+; MIPSR6-NEXT: beqzc $10, $BB8_1
; MIPSR6-NEXT: # %bb.2: # %entry
-; MIPSR6-NEXT: and $1, $2, $8
-; MIPSR6-NEXT: srlv $1, $1, $10
-; MIPSR6-NEXT: seh $1, $1
+; MIPSR6-NEXT: and $7, $8, $4
+; MIPSR6-NEXT: srlv $7, $7, $3
+; MIPSR6-NEXT: seh $7, $7
; MIPSR6-NEXT: # %bb.3: # %entry
-; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSR6-NEXT: # %bb.4: # %entry
-; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSR6-NEXT: sync
+; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSR6-NEXT: addiu $sp, $sp, 8
; MIPSR6-NEXT: jrc $ra
;
@@ -2833,37 +2833,37 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
; MM: # %bb.0: # %entry
; MM-NEXT: addiu $sp, $sp, -8
; MM-NEXT: .cfi_def_cfa_offset 8
-; MM-NEXT: # kill: def $at killed $a1
+; MM-NEXT: move $1, $5
; MM-NEXT: sync
-; MM-NEXT: addiu $1, $zero, -4
-; MM-NEXT: and $6, $4, $1
-; MM-NEXT: andi $1, $4, 3
-; MM-NEXT: xori $1, $1, 3
-; MM-NEXT: sll $10, $1, 3
-; MM-NEXT: ori $1, $zero, 255
-; MM-NEXT: sllv $8, $1, $10
-; MM-NEXT: nor $9, $zero, $8
-; MM-NEXT: sllv $7, $5, $10
+; MM-NEXT: addiu $2, $zero, -4
+; MM-NEXT: and $2, $4, $2
+; MM-NEXT: andi $3, $4, 3
+; MM-NEXT: xori $3, $3, 3
+; MM-NEXT: sll $3, $3, 3
+; MM-NEXT: ori $4, $zero, 255
+; MM-NEXT: sllv $4, $4, $3
+; MM-NEXT: nor $6, $zero, $4
+; MM-NEXT: sllv $5, $5, $3
; MM-NEXT: $BB8_1: # %entry
; MM-NEXT: # =>This Inner Loop Header: Depth=1
-; MM-NEXT: ll $2, 0($6)
-; MM-NEXT: slt $5, $2, $7
-; MM-NEXT: or $3, $2, $zero
-; MM-NEXT: movn $3, $7, $5
-; MM-NEXT: and $3, $3, $8
-; MM-NEXT: and $4, $2, $9
-; MM-NEXT: or $4, $4, $3
-; MM-NEXT: sc $4, 0($6)
-; MM-NEXT: beqzc $4, $BB8_1
+; MM-NEXT: ll $8, 0($2)
+; MM-NEXT: slt $11, $8, $5
+; MM-NEXT: or $9, $8, $zero
+; MM-NEXT: movn $9, $5, $11
+; MM-NEXT: and $9, $9, $4
+; MM-NEXT: and $10, $8, $6
+; MM-NEXT: or $10, $10, $9
+; MM-NEXT: sc $10, 0($2)
+; MM-NEXT: beqzc $10, $BB8_1
; MM-NEXT: # %bb.2: # %entry
-; MM-NEXT: and $1, $2, $8
-; MM-NEXT: srlv $1, $1, $10
-; MM-NEXT: seh $1, $1
+; MM-NEXT: and $7, $8, $4
+; MM-NEXT: srlv $7, $7, $3
+; MM-NEXT: seh $7, $7
; MM-NEXT: # %bb.3: # %entry
-; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MM-NEXT: # %bb.4: # %entry
-; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MM-NEXT: sync
+; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MM-NEXT: addiusp 8
; MM-NEXT: jrc $ra
;
@@ -2871,38 +2871,38 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
; MMR6: # %bb.0: # %entry
; MMR6-NEXT: addiu $sp, $sp, -8
; MMR6-NEXT: .cfi_def_cfa_offset 8
-; MMR6-NEXT: # kill: def $at killed $a1
+; MMR6-NEXT: move $1, $5
; MMR6-NEXT: sync
-; MMR6-NEXT: addiu $1, $zero, -4
-; MMR6-NEXT: and $6, $4, $1
-; MMR6-NEXT: andi $1, $4, 3
-; MMR6-NEXT: xori $1, $1, 3
-; MMR6-NEXT: sll $10, $1, 3
-; MMR6-NEXT: ori $1, $zero, 255
-; MMR6-NEXT: sllv $8, $1, $10
-; MMR6-NEXT: nor $9, $zero, $8
-; MMR6-NEXT: sllv $7, $5, $10
+; MMR6-NEXT: addiu $2, $zero, -4
+; MMR6-NEXT: and $2, $4, $2
+; MMR6-NEXT: andi $3, $4, 3
+; MMR6-NEXT: xori $3, $3, 3
+; MMR6-NEXT: sll $3, $3, 3
+; MMR6-NEXT: ori $4, $zero, 255
+; MMR6-NEXT: sllv $4, $4, $3
+; MMR6-NEXT: nor $6, $zero, $4
+; MMR6-NEXT: sllv $5, $5, $3
; MMR6-NEXT: $BB8_1: # %entry
; MMR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MMR6-NEXT: ll $2, 0($6)
-; MMR6-NEXT: slt $5, $2, $7
-; MMR6-NEXT: seleqz $3, $2, $5
-; MMR6-NEXT: selnez $5, $7, $5
-; MMR6-NEXT: or $3, $3, $5
-; MMR6-NEXT: and $3, $3, $8
-; MMR6-NEXT: and $4, $2, $9
-; MMR6-NEXT: or $4, $4, $3
-; MMR6-NEXT: sc $4, 0($6)
-; MMR6-NEXT: beqc $4, $zero, $BB8_1
+; MMR6-NEXT: ll $8, 0($2)
+; MMR6-NEXT: slt $11, $8, $5
+; MMR6-NEXT: seleqz $9, $8, $11
+; MMR6-NEXT: selnez $11, $5, $11
+; MMR6-NEXT: or $9, $9, $11
+; MMR6-NEXT: and $9, $9, $4
+; MMR6-NEXT: and $10, $8, $6
+; MMR6-NEXT: or $10, $10, $9
+; MMR6-NEXT: sc $10, 0($2)
+; MMR6-NEXT: beqc $10, $zero, $BB8_1
; MMR6-NEXT: # %bb.2: # %entry
-; MMR6-NEXT: and $1, $2, $8
-; MMR6-NEXT: srlv $1, $1, $10
-; MMR6-NEXT: seh $1, $1
+; MMR6-NEXT: and $7, $8, $4
+; MMR6-NEXT: srlv $7, $7, $3
+; MMR6-NEXT: seh $7, $7
; MMR6-NEXT: # %bb.3: # %entry
-; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMR6-NEXT: # %bb.4: # %entry
-; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMR6-NEXT: sync
+; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMR6-NEXT: addiu $sp, $sp, 8
; MMR6-NEXT: jrc $ra
;
@@ -2910,39 +2910,39 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
; MIPSEL: # %bb.0: # %entry
; MIPSEL-NEXT: addiu $sp, $sp, -8
; MIPSEL-NEXT: .cfi_def_cfa_offset 8
-; MIPSEL-NEXT: # kill: def $at killed $a1
+; MIPSEL-NEXT: move $1, $5
; MIPSEL-NEXT: sync
-; MIPSEL-NEXT: addiu $1, $zero, -4
-; MIPSEL-NEXT: and $6, $4, $1
-; MIPSEL-NEXT: andi $1, $4, 3
-; MIPSEL-NEXT: sll $10, $1, 3
-; MIPSEL-NEXT: ori $1, $zero, 255
-; MIPSEL-NEXT: sllv $8, $1, $10
-; MIPSEL-NEXT: nor $9, $zero, $8
-; MIPSEL-NEXT: sllv $7, $5, $10
+; MIPSEL-NEXT: addiu $2, $zero, -4
+; MIPSEL-NEXT: and $2, $4, $2
+; MIPSEL-NEXT: andi $3, $4, 3
+; MIPSEL-NEXT: sll $3, $3, 3
+; MIPSEL-NEXT: ori $4, $zero, 255
+; MIPSEL-NEXT: sllv $4, $4, $3
+; MIPSEL-NEXT: nor $6, $zero, $4
+; MIPSEL-NEXT: sllv $5, $5, $3
; MIPSEL-NEXT: $BB8_1: # %entry
; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSEL-NEXT: ll $2, 0($6)
-; MIPSEL-NEXT: and $2, $2, $8
-; MIPSEL-NEXT: and $7, $7, $8
-; MIPSEL-NEXT: slt $5, $2, $7
-; MIPSEL-NEXT: move $3, $2
-; MIPSEL-NEXT: movn $3, $7, $5
-; MIPSEL-NEXT: and $3, $3, $8
-; MIPSEL-NEXT: and $4, $2, $9
-; MIPSEL-NEXT: or $4, $4, $3
-; MIPSEL-NEXT: sc $4, 0($6)
-; MIPSEL-NEXT: beqz $4, $BB8_1
+; MIPSEL-NEXT: ll $8, 0($2)
+; MIPSEL-NEXT: and $8, $8, $4
+; MIPSEL-NEXT: and $5, $5, $4
+; MIPSEL-NEXT: slt $11, $8, $5
+; MIPSEL-NEXT: move $9, $8
+; MIPSEL-NEXT: movn $9, $5, $11
+; MIPSEL-NEXT: and $9, $9, $4
+; MIPSEL-NEXT: and $10, $8, $6
+; MIPSEL-NEXT: or $10, $10, $9
+; MIPSEL-NEXT: sc $10, 0($2)
+; MIPSEL-NEXT: beqz $10, $BB8_1
; MIPSEL-NEXT: nop
; MIPSEL-NEXT: # %bb.2: # %entry
-; MIPSEL-NEXT: and $1, $2, $8
-; MIPSEL-NEXT: srlv $1, $1, $10
-; MIPSEL-NEXT: seh $1, $1
+; MIPSEL-NEXT: and $7, $8, $4
+; MIPSEL-NEXT: srlv $7, $7, $3
+; MIPSEL-NEXT: seh $7, $7
; MIPSEL-NEXT: # %bb.3: # %entry
-; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSEL-NEXT: # %bb.4: # %entry
-; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSEL-NEXT: sync
+; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSEL-NEXT: addiu $sp, $sp, 8
; MIPSEL-NEXT: jr $ra
; MIPSEL-NEXT: nop
@@ -2951,39 +2951,39 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
; MIPSELR6: # %bb.0: # %entry
; MIPSELR6-NEXT: addiu $sp, $sp, -8
; MIPSELR6-NEXT: .cfi_def_cfa_offset 8
-; MIPSELR6-NEXT: # kill: def $at killed $a1
+; MIPSELR6-NEXT: move $1, $5
; MIPSELR6-NEXT: sync
-; MIPSELR6-NEXT: addiu $1, $zero, -4
-; MIPSELR6-NEXT: and $6, $4, $1
-; MIPSELR6-NEXT: andi $1, $4, 3
-; MIPSELR6-NEXT: sll $10, $1, 3
-; MIPSELR6-NEXT: ori $1, $zero, 255
-; MIPSELR6-NEXT: sllv $8, $1, $10
-; MIPSELR6-NEXT: nor $9, $zero, $8
-; MIPSELR6-NEXT: sllv $7, $5, $10
+; MIPSELR6-NEXT: addiu $2, $zero, -4
+; MIPSELR6-NEXT: and $2, $4, $2
+; MIPSELR6-NEXT: andi $3, $4, 3
+; MIPSELR6-NEXT: sll $3, $3, 3
+; MIPSELR6-NEXT: ori $4, $zero, 255
+; MIPSELR6-NEXT: sllv $4, $4, $3
+; MIPSELR6-NEXT: nor $6, $zero, $4
+; MIPSELR6-NEXT: sllv $5, $5, $3
; MIPSELR6-NEXT: $BB8_1: # %entry
; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSELR6-NEXT: ll $2, 0($6)
-; MIPSELR6-NEXT: and $2, $2, $8
-; MIPSELR6-NEXT: and $7, $7, $8
-; MIPSELR6-NEXT: slt $5, $2, $7
-; MIPSELR6-NEXT: seleqz $3, $2, $5
-; MIPSELR6-NEXT: selnez $5, $7, $5
-; MIPSELR6-NEXT: or $3, $3, $5
-; MIPSELR6-NEXT: and $3, $3, $8
-; MIPSELR6-NEXT: and $4, $2, $9
-; MIPSELR6-NEXT: or $4, $4, $3
-; MIPSELR6-NEXT: sc $4, 0($6)
-; MIPSELR6-NEXT: beqzc $4, $BB8_1
+; MIPSELR6-NEXT: ll $8, 0($2)
+; MIPSELR6-NEXT: and $8, $8, $4
+; MIPSELR6-NEXT: and $5, $5, $4
+; MIPSELR6-NEXT: slt $11, $8, $5
+; MIPSELR6-NEXT: seleqz $9, $8, $11
+; MIPSELR6-NEXT: selnez $11, $5, $11
+; MIPSELR6-NEXT: or $9, $9, $11
+; MIPSELR6-NEXT: and $9, $9, $4
+; MIPSELR6-NEXT: and $10, $8, $6
+; MIPSELR6-NEXT: or $10, $10, $9
+; MIPSELR6-NEXT: sc $10, 0($2)
+; MIPSELR6-NEXT: beqzc $10, $BB8_1
; MIPSELR6-NEXT: # %bb.2: # %entry
-; MIPSELR6-NEXT: and $1, $2, $8
-; MIPSELR6-NEXT: srlv $1, $1, $10
-; MIPSELR6-NEXT: seh $1, $1
+; MIPSELR6-NEXT: and $7, $8, $4
+; MIPSELR6-NEXT: srlv $7, $7, $3
+; MIPSELR6-NEXT: seh $7, $7
; MIPSELR6-NEXT: # %bb.3: # %entry
-; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSELR6-NEXT: # %bb.4: # %entry
-; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSELR6-NEXT: sync
+; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSELR6-NEXT: addiu $sp, $sp, 8
; MIPSELR6-NEXT: jrc $ra
;
@@ -2991,38 +2991,38 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
; MMEL: # %bb.0: # %entry
; MMEL-NEXT: addiu $sp, $sp, -8
; MMEL-NEXT: .cfi_def_cfa_offset 8
-; MMEL-NEXT: # kill: def $at killed $a1
+; MMEL-NEXT: move $1, $5
; MMEL-NEXT: sync
-; MMEL-NEXT: addiu $1, $zero, -4
-; MMEL-NEXT: and $6, $4, $1
-; MMEL-NEXT: andi $1, $4, 3
-; MMEL-NEXT: sll $10, $1, 3
-; MMEL-NEXT: ori $1, $zero, 255
-; MMEL-NEXT: sllv $8, $1, $10
-; MMEL-NEXT: nor $9, $zero, $8
-; MMEL-NEXT: sllv $7, $5, $10
+; MMEL-NEXT: addiu $2, $zero, -4
+; MMEL-NEXT: and $2, $4, $2
+; MMEL-NEXT: andi $3, $4, 3
+; MMEL-NEXT: sll $3, $3, 3
+; MMEL-NEXT: ori $4, $zero, 255
+; MMEL-NEXT: sllv $4, $4, $3
+; MMEL-NEXT: nor $6, $zero, $4
+; MMEL-NEXT: sllv $5, $5, $3
; MMEL-NEXT: $BB8_1: # %entry
; MMEL-NEXT: # =>This Inner Loop Header: Depth=1
-; MMEL-NEXT: ll $2, 0($6)
-; MMEL-NEXT: and $2, $2, $8
-; MMEL-NEXT: and $7, $7, $8
-; MMEL-NEXT: slt $5, $2, $7
-; MMEL-NEXT: or $3, $2, $zero
-; MMEL-NEXT: movn $3, $7, $5
-; MMEL-NEXT: and $3, $3, $8
-; MMEL-NEXT: and $4, $2, $9
-; MMEL-NEXT: or $4, $4, $3
-; MMEL-NEXT: sc $4, 0($6)
-; MMEL-NEXT: beqzc $4, $BB8_1
+; MMEL-NEXT: ll $8, 0($2)
+; MMEL-NEXT: and $8, $8, $4
+; MMEL-NEXT: and $5, $5, $4
+; MMEL-NEXT: slt $11, $8, $5
+; MMEL-NEXT: or $9, $8, $zero
+; MMEL-NEXT: movn $9, $5, $11
+; MMEL-NEXT: and $9, $9, $4
+; MMEL-NEXT: and $10, $8, $6
+; MMEL-NEXT: or $10, $10, $9
+; MMEL-NEXT: sc $10, 0($2)
+; MMEL-NEXT: beqzc $10, $BB8_1
; MMEL-NEXT: # %bb.2: # %entry
-; MMEL-NEXT: and $1, $2, $8
-; MMEL-NEXT: srlv $1, $1, $10
-; MMEL-NEXT: seh $1, $1
+; MMEL-NEXT: and $7, $8, $4
+; MMEL-NEXT: srlv $7, $7, $3
+; MMEL-NEXT: seh $7, $7
; MMEL-NEXT: # %bb.3: # %entry
-; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMEL-NEXT: # %bb.4: # %entry
-; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMEL-NEXT: sync
+; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMEL-NEXT: addiusp 8
; MMEL-NEXT: jrc $ra
;
@@ -3030,39 +3030,39 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
; MMELR6: # %bb.0: # %entry
; MMELR6-NEXT: addiu $sp, $sp, -8
; MMELR6-NEXT: .cfi_def_cfa_offset 8
-; MMELR6-NEXT: # kill: def $at killed $a1
+; MMELR6-NEXT: move $1, $5
; MMELR6-NEXT: sync
-; MMELR6-NEXT: addiu $1, $zero, -4
-; MMELR6-NEXT: and $6, $4, $1
-; MMELR6-NEXT: andi $1, $4, 3
-; MMELR6-NEXT: sll $10, $1, 3
-; MMELR6-NEXT: ori $1, $zero, 255
-; MMELR6-NEXT: sllv $8, $1, $10
-; MMELR6-NEXT: nor $9, $zero, $8
-; MMELR6-NEXT: sllv $7, $5, $10
+; MMELR6-NEXT: addiu $2, $zero, -4
+; MMELR6-NEXT: and $2, $4, $2
+; MMELR6-NEXT: andi $3, $4, 3
+; MMELR6-NEXT: sll $3, $3, 3
+; MMELR6-NEXT: ori $4, $zero, 255
+; MMELR6-NEXT: sllv $4, $4, $3
+; MMELR6-NEXT: nor $6, $zero, $4
+; MMELR6-NEXT: sllv $5, $5, $3
; MMELR6-NEXT: $BB8_1: # %entry
; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MMELR6-NEXT: ll $2, 0($6)
-; MMELR6-NEXT: and $2, $2, $8
-; MMELR6-NEXT: and $7, $7, $8
-; MMELR6-NEXT: slt $5, $2, $7
-; MMELR6-NEXT: seleqz $3, $2, $5
-; MMELR6-NEXT: selnez $5, $7, $5
-; MMELR6-NEXT: or $3, $3, $5
-; MMELR6-NEXT: and $3, $3, $8
-; MMELR6-NEXT: and $4, $2, $9
-; MMELR6-NEXT: or $4, $4, $3
-; MMELR6-NEXT: sc $4, 0($6)
-; MMELR6-NEXT: beqc $4, $zero, $BB8_1
+; MMELR6-NEXT: ll $8, 0($2)
+; MMELR6-NEXT: and $8, $8, $4
+; MMELR6-NEXT: and $5, $5, $4
+; MMELR6-NEXT: slt $11, $8, $5
+; MMELR6-NEXT: seleqz $9, $8, $11
+; MMELR6-NEXT: selnez $11, $5, $11
+; MMELR6-NEXT: or $9, $9, $11
+; MMELR6-NEXT: and $9, $9, $4
+; MMELR6-NEXT: and $10, $8, $6
+; MMELR6-NEXT: or $10, $10, $9
+; MMELR6-NEXT: sc $10, 0($2)
+; MMELR6-NEXT: beqc $10, $zero, $BB8_1
; MMELR6-NEXT: # %bb.2: # %entry
-; MMELR6-NEXT: and $1, $2, $8
-; MMELR6-NEXT: srlv $1, $1, $10
-; MMELR6-NEXT: seh $1, $1
+; MMELR6-NEXT: and $7, $8, $4
+; MMELR6-NEXT: srlv $7, $7, $3
+; MMELR6-NEXT: seh $7, $7
; MMELR6-NEXT: # %bb.3: # %entry
-; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMELR6-NEXT: # %bb.4: # %entry
-; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMELR6-NEXT: sync
+; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMELR6-NEXT: addiu $sp, $sp, 8
; MMELR6-NEXT: jrc $ra
;
@@ -3070,38 +3070,38 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: daddiu $sp, $sp, -16
; MIPS64-NEXT: .cfi_def_cfa_offset 16
-; MIPS64-NEXT: move $1, $5
+; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64-NEXT: sync
-; MIPS64-NEXT: daddiu $2, $zero, -4
-; MIPS64-NEXT: and $6, $4, $2
+; MIPS64-NEXT: daddiu $1, $zero, -4
+; MIPS64-NEXT: and $1, $4, $1
; MIPS64-NEXT: andi $2, $4, 3
; MIPS64-NEXT: xori $2, $2, 3
-; MIPS64-NEXT: sll $10, $2, 3
-; MIPS64-NEXT: ori $2, $zero, 255
-; MIPS64-NEXT: sllv $8, $2, $10
-; MIPS64-NEXT: nor $9, $zero, $8
-; MIPS64-NEXT: sllv $7, $1, $10
+; MIPS64-NEXT: sll $2, $2, 3
+; MIPS64-NEXT: ori $3, $zero, 255
+; MIPS64-NEXT: sllv $3, $3, $2
+; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB8_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $2, 0($6)
-; MIPS64-NEXT: slt $5, $2, $7
-; MIPS64-NEXT: move $3, $2
-; MIPS64-NEXT: movn $3, $7, $5
-; MIPS64-NEXT: and $3, $3, $8
-; MIPS64-NEXT: and $4, $2, $9
-; MIPS64-NEXT: or $4, $4, $3
-; MIPS64-NEXT: sc $4, 0($6)
-; MIPS64-NEXT: beqz $4, .LBB8_1
+; MIPS64-NEXT: ll $7, 0($1)
+; MIPS64-NEXT: slt $10, $7, $5
+; MIPS64-NEXT: move $8, $7
+; MIPS64-NEXT: movn $8, $5, $10
+; MIPS64-NEXT: and $8, $8, $3
+; MIPS64-NEXT: and $9, $7, $4
+; MIPS64-NEXT: or $9, $9, $8
+; MIPS64-NEXT: sc $9, 0($1)
+; MIPS64-NEXT: beqz $9, .LBB8_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $1, $2, $8
-; MIPS64-NEXT: srlv $1, $1, $10
-; MIPS64-NEXT: seh $1, $1
+; MIPS64-NEXT: and $6, $7, $3
+; MIPS64-NEXT: srlv $6, $6, $2
+; MIPS64-NEXT: seh $6, $6
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
-; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: sync
+; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: daddiu $sp, $sp, 16
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
@@ -3110,38 +3110,38 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: daddiu $sp, $sp, -16
; MIPS64R6-NEXT: .cfi_def_cfa_offset 16
-; MIPS64R6-NEXT: move $1, $5
+; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64R6-NEXT: sync
-; MIPS64R6-NEXT: daddiu $2, $zero, -4
-; MIPS64R6-NEXT: and $6, $4, $2
+; MIPS64R6-NEXT: daddiu $1, $zero, -4
+; MIPS64R6-NEXT: and $1, $4, $1
; MIPS64R6-NEXT: andi $2, $4, 3
; MIPS64R6-NEXT: xori $2, $2, 3
-; MIPS64R6-NEXT: sll $10, $2, 3
-; MIPS64R6-NEXT: ori $2, $zero, 255
-; MIPS64R6-NEXT: sllv $8, $2, $10
-; MIPS64R6-NEXT: nor $9, $zero, $8
-; MIPS64R6-NEXT: sllv $7, $1, $10
+; MIPS64R6-NEXT: sll $2, $2, 3
+; MIPS64R6-NEXT: ori $3, $zero, 255
+; MIPS64R6-NEXT: sllv $3, $3, $2
+; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB8_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $2, 0($6)
-; MIPS64R6-NEXT: slt $5, $2, $7
-; MIPS64R6-NEXT: seleqz $3, $2, $5
-; MIPS64R6-NEXT: selnez $5, $7, $5
-; MIPS64R6-NEXT: or $3, $3, $5
-; MIPS64R6-NEXT: and $3, $3, $8
-; MIPS64R6-NEXT: and $4, $2, $9
-; MIPS64R6-NEXT: or $4, $4, $3
-; MIPS64R6-NEXT: sc $4, 0($6)
-; MIPS64R6-NEXT: beqzc $4, .LBB8_1
+; MIPS64R6-NEXT: ll $7, 0($1)
+; MIPS64R6-NEXT: slt $10, $7, $5
+; MIPS64R6-NEXT: seleqz $8, $7, $10
+; MIPS64R6-NEXT: selnez $10, $5, $10
+; MIPS64R6-NEXT: or $8, $8, $10
+; MIPS64R6-NEXT: and $8, $8, $3
+; MIPS64R6-NEXT: and $9, $7, $4
+; MIPS64R6-NEXT: or $9, $9, $8
+; MIPS64R6-NEXT: sc $9, 0($1)
+; MIPS64R6-NEXT: beqzc $9, .LBB8_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $1, $2, $8
-; MIPS64R6-NEXT: srlv $1, $1, $10
-; MIPS64R6-NEXT: seh $1, $1
+; MIPS64R6-NEXT: and $6, $7, $3
+; MIPS64R6-NEXT: srlv $6, $6, $2
+; MIPS64R6-NEXT: seh $6, $6
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
-; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: sync
+; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: daddiu $sp, $sp, 16
; MIPS64R6-NEXT: jrc $ra
;
@@ -3149,39 +3149,39 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64EL: # %bb.0: # %entry
; MIPS64EL-NEXT: daddiu $sp, $sp, -16
; MIPS64EL-NEXT: .cfi_def_cfa_offset 16
-; MIPS64EL-NEXT: move $1, $5
+; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64EL-NEXT: sync
-; MIPS64EL-NEXT: daddiu $2, $zero, -4
-; MIPS64EL-NEXT: and $6, $4, $2
+; MIPS64EL-NEXT: daddiu $1, $zero, -4
+; MIPS64EL-NEXT: and $1, $4, $1
; MIPS64EL-NEXT: andi $2, $4, 3
-; MIPS64EL-NEXT: sll $10, $2, 3
-; MIPS64EL-NEXT: ori $2, $zero, 255
-; MIPS64EL-NEXT: sllv $8, $2, $10
-; MIPS64EL-NEXT: nor $9, $zero, $8
-; MIPS64EL-NEXT: sllv $7, $1, $10
+; MIPS64EL-NEXT: sll $2, $2, 3
+; MIPS64EL-NEXT: ori $3, $zero, 255
+; MIPS64EL-NEXT: sllv $3, $3, $2
+; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB8_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $2, 0($6)
-; MIPS64EL-NEXT: and $2, $2, $8
-; MIPS64EL-NEXT: and $7, $7, $8
-; MIPS64EL-NEXT: slt $5, $2, $7
-; MIPS64EL-NEXT: move $3, $2
-; MIPS64EL-NEXT: movn $3, $7, $5
-; MIPS64EL-NEXT: and $3, $3, $8
-; MIPS64EL-NEXT: and $4, $2, $9
-; MIPS64EL-NEXT: or $4, $4, $3
-; MIPS64EL-NEXT: sc $4, 0($6)
-; MIPS64EL-NEXT: beqz $4, .LBB8_1
+; MIPS64EL-NEXT: ll $7, 0($1)
+; MIPS64EL-NEXT: and $7, $7, $3
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: slt $10, $7, $5
+; MIPS64EL-NEXT: move $8, $7
+; MIPS64EL-NEXT: movn $8, $5, $10
+; MIPS64EL-NEXT: and $8, $8, $3
+; MIPS64EL-NEXT: and $9, $7, $4
+; MIPS64EL-NEXT: or $9, $9, $8
+; MIPS64EL-NEXT: sc $9, 0($1)
+; MIPS64EL-NEXT: beqz $9, .LBB8_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $1, $2, $8
-; MIPS64EL-NEXT: srlv $1, $1, $10
-; MIPS64EL-NEXT: seh $1, $1
+; MIPS64EL-NEXT: and $6, $7, $3
+; MIPS64EL-NEXT: srlv $6, $6, $2
+; MIPS64EL-NEXT: seh $6, $6
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
-; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: sync
+; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: daddiu $sp, $sp, 16
; MIPS64EL-NEXT: jr $ra
; MIPS64EL-NEXT: nop
@@ -3190,39 +3190,39 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64ELR6: # %bb.0: # %entry
; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16
; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16
-; MIPS64ELR6-NEXT: move $1, $5
+; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64ELR6-NEXT: sync
-; MIPS64ELR6-NEXT: daddiu $2, $zero, -4
-; MIPS64ELR6-NEXT: and $6, $4, $2
+; MIPS64ELR6-NEXT: daddiu $1, $zero, -4
+; MIPS64ELR6-NEXT: and $1, $4, $1
; MIPS64ELR6-NEXT: andi $2, $4, 3
-; MIPS64ELR6-NEXT: sll $10, $2, 3
-; MIPS64ELR6-NEXT: ori $2, $zero, 255
-; MIPS64ELR6-NEXT: sllv $8, $2, $10
-; MIPS64ELR6-NEXT: nor $9, $zero, $8
-; MIPS64ELR6-NEXT: sllv $7, $1, $10
+; MIPS64ELR6-NEXT: sll $2, $2, 3
+; MIPS64ELR6-NEXT: ori $3, $zero, 255
+; MIPS64ELR6-NEXT: sllv $3, $3, $2
+; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB8_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $2, 0($6)
-; MIPS64ELR6-NEXT: and $2, $2, $8
-; MIPS64ELR6-NEXT: and $7, $7, $8
-; MIPS64ELR6-NEXT: slt $5, $2, $7
-; MIPS64ELR6-NEXT: seleqz $3, $2, $5
-; MIPS64ELR6-NEXT: selnez $5, $7, $5
-; MIPS64ELR6-NEXT: or $3, $3, $5
-; MIPS64ELR6-NEXT: and $3, $3, $8
-; MIPS64ELR6-NEXT: and $4, $2, $9
-; MIPS64ELR6-NEXT: or $4, $4, $3
-; MIPS64ELR6-NEXT: sc $4, 0($6)
-; MIPS64ELR6-NEXT: beqzc $4, .LBB8_1
+; MIPS64ELR6-NEXT: ll $7, 0($1)
+; MIPS64ELR6-NEXT: and $7, $7, $3
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: slt $10, $7, $5
+; MIPS64ELR6-NEXT: seleqz $8, $7, $10
+; MIPS64ELR6-NEXT: selnez $10, $5, $10
+; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: and $8, $8, $3
+; MIPS64ELR6-NEXT: and $9, $7, $4
+; MIPS64ELR6-NEXT: or $9, $9, $8
+; MIPS64ELR6-NEXT: sc $9, 0($1)
+; MIPS64ELR6-NEXT: beqzc $9, .LBB8_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $1, $2, $8
-; MIPS64ELR6-NEXT: srlv $1, $1, $10
-; MIPS64ELR6-NEXT: seh $1, $1
+; MIPS64ELR6-NEXT: and $6, $7, $3
+; MIPS64ELR6-NEXT: srlv $6, $6, $2
+; MIPS64ELR6-NEXT: seh $6, $6
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
-; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: sync
+; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16
; MIPS64ELR6-NEXT: jrc $ra
entry:
@@ -3235,38 +3235,38 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS: # %bb.0: # %entry
; MIPS-NEXT: addiu $sp, $sp, -8
; MIPS-NEXT: .cfi_def_cfa_offset 8
-; MIPS-NEXT: # kill: def $at killed $a1
+; MIPS-NEXT: move $1, $5
; MIPS-NEXT: sync
-; MIPS-NEXT: addiu $1, $zero, -4
-; MIPS-NEXT: and $6, $4, $1
-; MIPS-NEXT: andi $1, $4, 3
-; MIPS-NEXT: xori $1, $1, 3
-; MIPS-NEXT: sll $10, $1, 3
-; MIPS-NEXT: ori $1, $zero, 255
-; MIPS-NEXT: sllv $8, $1, $10
-; MIPS-NEXT: nor $9, $zero, $8
-; MIPS-NEXT: sllv $7, $5, $10
+; MIPS-NEXT: addiu $2, $zero, -4
+; MIPS-NEXT: and $2, $4, $2
+; MIPS-NEXT: andi $3, $4, 3
+; MIPS-NEXT: xori $3, $3, 3
+; MIPS-NEXT: sll $3, $3, 3
+; MIPS-NEXT: ori $4, $zero, 255
+; MIPS-NEXT: sllv $4, $4, $3
+; MIPS-NEXT: nor $6, $zero, $4
+; MIPS-NEXT: sllv $5, $5, $3
; MIPS-NEXT: $BB9_1: # %entry
; MIPS-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS-NEXT: ll $2, 0($6)
-; MIPS-NEXT: slt $5, $2, $7
-; MIPS-NEXT: move $3, $2
-; MIPS-NEXT: movz $3, $7, $5
-; MIPS-NEXT: and $3, $3, $8
-; MIPS-NEXT: and $4, $2, $9
-; MIPS-NEXT: or $4, $4, $3
-; MIPS-NEXT: sc $4, 0($6)
-; MIPS-NEXT: beqz $4, $BB9_1
+; MIPS-NEXT: ll $8, 0($2)
+; MIPS-NEXT: slt $11, $8, $5
+; MIPS-NEXT: move $9, $8
+; MIPS-NEXT: movz $9, $5, $11
+; MIPS-NEXT: and $9, $9, $4
+; MIPS-NEXT: and $10, $8, $6
+; MIPS-NEXT: or $10, $10, $9
+; MIPS-NEXT: sc $10, 0($2)
+; MIPS-NEXT: beqz $10, $BB9_1
; MIPS-NEXT: nop
; MIPS-NEXT: # %bb.2: # %entry
-; MIPS-NEXT: and $1, $2, $8
-; MIPS-NEXT: srlv $1, $1, $10
-; MIPS-NEXT: seh $1, $1
+; MIPS-NEXT: and $7, $8, $4
+; MIPS-NEXT: srlv $7, $7, $3
+; MIPS-NEXT: seh $7, $7
; MIPS-NEXT: # %bb.3: # %entry
-; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS-NEXT: # %bb.4: # %entry
-; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS-NEXT: sync
+; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS-NEXT: addiu $sp, $sp, 8
; MIPS-NEXT: jr $ra
; MIPS-NEXT: nop
@@ -3275,38 +3275,38 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
; MIPSR6: # %bb.0: # %entry
; MIPSR6-NEXT: addiu $sp, $sp, -8
; MIPSR6-NEXT: .cfi_def_cfa_offset 8
-; MIPSR6-NEXT: # kill: def $at killed $a1
+; MIPSR6-NEXT: move $1, $5
; MIPSR6-NEXT: sync
-; MIPSR6-NEXT: addiu $1, $zero, -4
-; MIPSR6-NEXT: and $6, $4, $1
-; MIPSR6-NEXT: andi $1, $4, 3
-; MIPSR6-NEXT: xori $1, $1, 3
-; MIPSR6-NEXT: sll $10, $1, 3
-; MIPSR6-NEXT: ori $1, $zero, 255
-; MIPSR6-NEXT: sllv $8, $1, $10
-; MIPSR6-NEXT: nor $9, $zero, $8
-; MIPSR6-NEXT: sllv $7, $5, $10
+; MIPSR6-NEXT: addiu $2, $zero, -4
+; MIPSR6-NEXT: and $2, $4, $2
+; MIPSR6-NEXT: andi $3, $4, 3
+; MIPSR6-NEXT: xori $3, $3, 3
+; MIPSR6-NEXT: sll $3, $3, 3
+; MIPSR6-NEXT: ori $4, $zero, 255
+; MIPSR6-NEXT: sllv $4, $4, $3
+; MIPSR6-NEXT: nor $6, $zero, $4
+; MIPSR6-NEXT: sllv $5, $5, $3
; MIPSR6-NEXT: $BB9_1: # %entry
; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSR6-NEXT: ll $2, 0($6)
-; MIPSR6-NEXT: slt $5, $2, $7
-; MIPSR6-NEXT: selnez $3, $2, $5
-; MIPSR6-NEXT: seleqz $5, $7, $5
-; MIPSR6-NEXT: or $3, $3, $5
-; MIPSR6-NEXT: and $3, $3, $8
-; MIPSR6-NEXT: and $4, $2, $9
-; MIPSR6-NEXT: or $4, $4, $3
-; MIPSR6-NEXT: sc $4, 0($6)
-; MIPSR6-NEXT: beqzc $4, $BB9_1
+; MIPSR6-NEXT: ll $8, 0($2)
+; MIPSR6-NEXT: slt $11, $8, $5
+; MIPSR6-NEXT: selnez $9, $8, $11
+; MIPSR6-NEXT: seleqz $11, $5, $11
+; MIPSR6-NEXT: or $9, $9, $11
+; MIPSR6-NEXT: and $9, $9, $4
+; MIPSR6-NEXT: and $10, $8, $6
+; MIPSR6-NEXT: or $10, $10, $9
+; MIPSR6-NEXT: sc $10, 0($2)
+; MIPSR6-NEXT: beqzc $10, $BB9_1
; MIPSR6-NEXT: # %bb.2: # %entry
-; MIPSR6-NEXT: and $1, $2, $8
-; MIPSR6-NEXT: srlv $1, $1, $10
-; MIPSR6-NEXT: seh $1, $1
+; MIPSR6-NEXT: and $7, $8, $4
+; MIPSR6-NEXT: srlv $7, $7, $3
+; MIPSR6-NEXT: seh $7, $7
; MIPSR6-NEXT: # %bb.3: # %entry
-; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSR6-NEXT: # %bb.4: # %entry
-; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSR6-NEXT: sync
+; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSR6-NEXT: addiu $sp, $sp, 8
; MIPSR6-NEXT: jrc $ra
;
@@ -3314,37 +3314,37 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
; MM: # %bb.0: # %entry
; MM-NEXT: addiu $sp, $sp, -8
; MM-NEXT: .cfi_def_cfa_offset 8
-; MM-NEXT: # kill: def $at killed $a1
+; MM-NEXT: move $1, $5
; MM-NEXT: sync
-; MM-NEXT: addiu $1, $zero, -4
-; MM-NEXT: and $6, $4, $1
-; MM-NEXT: andi $1, $4, 3
-; MM-NEXT: xori $1, $1, 3
-; MM-NEXT: sll $10, $1, 3
-; MM-NEXT: ori $1, $zero, 255
-; MM-NEXT: sllv $8, $1, $10
-; MM-NEXT: nor $9, $zero, $8
-; MM-NEXT: sllv $7, $5, $10
+; MM-NEXT: addiu $2, $zero, -4
+; MM-NEXT: and $2, $4, $2
+; MM-NEXT: andi $3, $4, 3
+; MM-NEXT: xori $3, $3, 3
+; MM-NEXT: sll $3, $3, 3
+; MM-NEXT: ori $4, $zero, 255
+; MM-NEXT: sllv $4, $4, $3
+; MM-NEXT: nor $6, $zero, $4
+; MM-NEXT: sllv $5, $5, $3
; MM-NEXT: $BB9_1: # %entry
; MM-NEXT: # =>This Inner Loop Header: Depth=1
-; MM-NEXT: ll $2, 0($6)
-; MM-NEXT: slt $5, $2, $7
-; MM-NEXT: or $3, $2, $zero
-; MM-NEXT: movz $3, $7, $5
-; MM-NEXT: and $3, $3, $8
-; MM-NEXT: and $4, $2, $9
-; MM-NEXT: or $4, $4, $3
-; MM-NEXT: sc $4, 0($6)
-; MM-NEXT: beqzc $4, $BB9_1
+; MM-NEXT: ll $8, 0($2)
+; MM-NEXT: slt $11, $8, $5
+; MM-NEXT: or $9, $8, $zero
+; MM-NEXT: movz $9, $5, $11
+; MM-NEXT: and $9, $9, $4
+; MM-NEXT: and $10, $8, $6
+; MM-NEXT: or $10, $10, $9
+; MM-NEXT: sc $10, 0($2)
+; MM-NEXT: beqzc $10, $BB9_1
; MM-NEXT: # %bb.2: # %entry
-; MM-NEXT: and $1, $2, $8
-; MM-NEXT: srlv $1, $1, $10
-; MM-NEXT: seh $1, $1
+; MM-NEXT: and $7, $8, $4
+; MM-NEXT: srlv $7, $7, $3
+; MM-NEXT: seh $7, $7
; MM-NEXT: # %bb.3: # %entry
-; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MM-NEXT: # %bb.4: # %entry
-; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MM-NEXT: sync
+; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MM-NEXT: addiusp 8
; MM-NEXT: jrc $ra
;
@@ -3352,38 +3352,38 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
; MMR6: # %bb.0: # %entry
; MMR6-NEXT: addiu $sp, $sp, -8
; MMR6-NEXT: .cfi_def_cfa_offset 8
-; MMR6-NEXT: # kill: def $at killed $a1
+; MMR6-NEXT: move $1, $5
; MMR6-NEXT: sync
-; MMR6-NEXT: addiu $1, $zero, -4
-; MMR6-NEXT: and $6, $4, $1
-; MMR6-NEXT: andi $1, $4, 3
-; MMR6-NEXT: xori $1, $1, 3
-; MMR6-NEXT: sll $10, $1, 3
-; MMR6-NEXT: ori $1, $zero, 255
-; MMR6-NEXT: sllv $8, $1, $10
-; MMR6-NEXT: nor $9, $zero, $8
-; MMR6-NEXT: sllv $7, $5, $10
+; MMR6-NEXT: addiu $2, $zero, -4
+; MMR6-NEXT: and $2, $4, $2
+; MMR6-NEXT: andi $3, $4, 3
+; MMR6-NEXT: xori $3, $3, 3
+; MMR6-NEXT: sll $3, $3, 3
+; MMR6-NEXT: ori $4, $zero, 255
+; MMR6-NEXT: sllv $4, $4, $3
+; MMR6-NEXT: nor $6, $zero, $4
+; MMR6-NEXT: sllv $5, $5, $3
; MMR6-NEXT: $BB9_1: # %entry
; MMR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MMR6-NEXT: ll $2, 0($6)
-; MMR6-NEXT: slt $5, $2, $7
-; MMR6-NEXT: selnez $3, $2, $5
-; MMR6-NEXT: seleqz $5, $7, $5
-; MMR6-NEXT: or $3, $3, $5
-; MMR6-NEXT: and $3, $3, $8
-; MMR6-NEXT: and $4, $2, $9
-; MMR6-NEXT: or $4, $4, $3
-; MMR6-NEXT: sc $4, 0($6)
-; MMR6-NEXT: beqc $4, $zero, $BB9_1
+; MMR6-NEXT: ll $8, 0($2)
+; MMR6-NEXT: slt $11, $8, $5
+; MMR6-NEXT: selnez $9, $8, $11
+; MMR6-NEXT: seleqz $11, $5, $11
+; MMR6-NEXT: or $9, $9, $11
+; MMR6-NEXT: and $9, $9, $4
+; MMR6-NEXT: and $10, $8, $6
+; MMR6-NEXT: or $10, $10, $9
+; MMR6-NEXT: sc $10, 0($2)
+; MMR6-NEXT: beqc $10, $zero, $BB9_1
; MMR6-NEXT: # %bb.2: # %entry
-; MMR6-NEXT: and $1, $2, $8
-; MMR6-NEXT: srlv $1, $1, $10
-; MMR6-NEXT: seh $1, $1
+; MMR6-NEXT: and $7, $8, $4
+; MMR6-NEXT: srlv $7, $7, $3
+; MMR6-NEXT: seh $7, $7
; MMR6-NEXT: # %bb.3: # %entry
-; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMR6-NEXT: # %bb.4: # %entry
-; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMR6-NEXT: sync
+; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMR6-NEXT: addiu $sp, $sp, 8
; MMR6-NEXT: jrc $ra
;
@@ -3391,39 +3391,39 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
; MIPSEL: # %bb.0: # %entry
; MIPSEL-NEXT: addiu $sp, $sp, -8
; MIPSEL-NEXT: .cfi_def_cfa_offset 8
-; MIPSEL-NEXT: # kill: def $at killed $a1
+; MIPSEL-NEXT: move $1, $5
; MIPSEL-NEXT: sync
-; MIPSEL-NEXT: addiu $1, $zero, -4
-; MIPSEL-NEXT: and $6, $4, $1
-; MIPSEL-NEXT: andi $1, $4, 3
-; MIPSEL-NEXT: sll $10, $1, 3
-; MIPSEL-NEXT: ori $1, $zero, 255
-; MIPSEL-NEXT: sllv $8, $1, $10
-; MIPSEL-NEXT: nor $9, $zero, $8
-; MIPSEL-NEXT: sllv $7, $5, $10
+; MIPSEL-NEXT: addiu $2, $zero, -4
+; MIPSEL-NEXT: and $2, $4, $2
+; MIPSEL-NEXT: andi $3, $4, 3
+; MIPSEL-NEXT: sll $3, $3, 3
+; MIPSEL-NEXT: ori $4, $zero, 255
+; MIPSEL-NEXT: sllv $4, $4, $3
+; MIPSEL-NEXT: nor $6, $zero, $4
+; MIPSEL-NEXT: sllv $5, $5, $3
; MIPSEL-NEXT: $BB9_1: # %entry
; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSEL-NEXT: ll $2, 0($6)
-; MIPSEL-NEXT: and $2, $2, $8
-; MIPSEL-NEXT: and $7, $7, $8
-; MIPSEL-NEXT: slt $5, $2, $7
-; MIPSEL-NEXT: move $3, $2
-; MIPSEL-NEXT: movz $3, $7, $5
-; MIPSEL-NEXT: and $3, $3, $8
-; MIPSEL-NEXT: and $4, $2, $9
-; MIPSEL-NEXT: or $4, $4, $3
-; MIPSEL-NEXT: sc $4, 0($6)
-; MIPSEL-NEXT: beqz $4, $BB9_1
+; MIPSEL-NEXT: ll $8, 0($2)
+; MIPSEL-NEXT: and $8, $8, $4
+; MIPSEL-NEXT: and $5, $5, $4
+; MIPSEL-NEXT: slt $11, $8, $5
+; MIPSEL-NEXT: move $9, $8
+; MIPSEL-NEXT: movz $9, $5, $11
+; MIPSEL-NEXT: and $9, $9, $4
+; MIPSEL-NEXT: and $10, $8, $6
+; MIPSEL-NEXT: or $10, $10, $9
+; MIPSEL-NEXT: sc $10, 0($2)
+; MIPSEL-NEXT: beqz $10, $BB9_1
; MIPSEL-NEXT: nop
; MIPSEL-NEXT: # %bb.2: # %entry
-; MIPSEL-NEXT: and $1, $2, $8
-; MIPSEL-NEXT: srlv $1, $1, $10
-; MIPSEL-NEXT: seh $1, $1
+; MIPSEL-NEXT: and $7, $8, $4
+; MIPSEL-NEXT: srlv $7, $7, $3
+; MIPSEL-NEXT: seh $7, $7
; MIPSEL-NEXT: # %bb.3: # %entry
-; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSEL-NEXT: # %bb.4: # %entry
-; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSEL-NEXT: sync
+; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSEL-NEXT: addiu $sp, $sp, 8
; MIPSEL-NEXT: jr $ra
; MIPSEL-NEXT: nop
@@ -3432,39 +3432,39 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
; MIPSELR6: # %bb.0: # %entry
; MIPSELR6-NEXT: addiu $sp, $sp, -8
; MIPSELR6-NEXT: .cfi_def_cfa_offset 8
-; MIPSELR6-NEXT: # kill: def $at killed $a1
+; MIPSELR6-NEXT: move $1, $5
; MIPSELR6-NEXT: sync
-; MIPSELR6-NEXT: addiu $1, $zero, -4
-; MIPSELR6-NEXT: and $6, $4, $1
-; MIPSELR6-NEXT: andi $1, $4, 3
-; MIPSELR6-NEXT: sll $10, $1, 3
-; MIPSELR6-NEXT: ori $1, $zero, 255
-; MIPSELR6-NEXT: sllv $8, $1, $10
-; MIPSELR6-NEXT: nor $9, $zero, $8
-; MIPSELR6-NEXT: sllv $7, $5, $10
+; MIPSELR6-NEXT: addiu $2, $zero, -4
+; MIPSELR6-NEXT: and $2, $4, $2
+; MIPSELR6-NEXT: andi $3, $4, 3
+; MIPSELR6-NEXT: sll $3, $3, 3
+; MIPSELR6-NEXT: ori $4, $zero, 255
+; MIPSELR6-NEXT: sllv $4, $4, $3
+; MIPSELR6-NEXT: nor $6, $zero, $4
+; MIPSELR6-NEXT: sllv $5, $5, $3
; MIPSELR6-NEXT: $BB9_1: # %entry
; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSELR6-NEXT: ll $2, 0($6)
-; MIPSELR6-NEXT: and $2, $2, $8
-; MIPSELR6-NEXT: and $7, $7, $8
-; MIPSELR6-NEXT: slt $5, $2, $7
-; MIPSELR6-NEXT: selnez $3, $2, $5
-; MIPSELR6-NEXT: seleqz $5, $7, $5
-; MIPSELR6-NEXT: or $3, $3, $5
-; MIPSELR6-NEXT: and $3, $3, $8
-; MIPSELR6-NEXT: and $4, $2, $9
-; MIPSELR6-NEXT: or $4, $4, $3
-; MIPSELR6-NEXT: sc $4, 0($6)
-; MIPSELR6-NEXT: beqzc $4, $BB9_1
+; MIPSELR6-NEXT: ll $8, 0($2)
+; MIPSELR6-NEXT: and $8, $8, $4
+; MIPSELR6-NEXT: and $5, $5, $4
+; MIPSELR6-NEXT: slt $11, $8, $5
+; MIPSELR6-NEXT: selnez $9, $8, $11
+; MIPSELR6-NEXT: seleqz $11, $5, $11
+; MIPSELR6-NEXT: or $9, $9, $11
+; MIPSELR6-NEXT: and $9, $9, $4
+; MIPSELR6-NEXT: and $10, $8, $6
+; MIPSELR6-NEXT: or $10, $10, $9
+; MIPSELR6-NEXT: sc $10, 0($2)
+; MIPSELR6-NEXT: beqzc $10, $BB9_1
; MIPSELR6-NEXT: # %bb.2: # %entry
-; MIPSELR6-NEXT: and $1, $2, $8
-; MIPSELR6-NEXT: srlv $1, $1, $10
-; MIPSELR6-NEXT: seh $1, $1
+; MIPSELR6-NEXT: and $7, $8, $4
+; MIPSELR6-NEXT: srlv $7, $7, $3
+; MIPSELR6-NEXT: seh $7, $7
; MIPSELR6-NEXT: # %bb.3: # %entry
-; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSELR6-NEXT: # %bb.4: # %entry
-; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSELR6-NEXT: sync
+; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSELR6-NEXT: addiu $sp, $sp, 8
; MIPSELR6-NEXT: jrc $ra
;
@@ -3472,38 +3472,38 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
; MMEL: # %bb.0: # %entry
; MMEL-NEXT: addiu $sp, $sp, -8
; MMEL-NEXT: .cfi_def_cfa_offset 8
-; MMEL-NEXT: # kill: def $at killed $a1
+; MMEL-NEXT: move $1, $5
; MMEL-NEXT: sync
-; MMEL-NEXT: addiu $1, $zero, -4
-; MMEL-NEXT: and $6, $4, $1
-; MMEL-NEXT: andi $1, $4, 3
-; MMEL-NEXT: sll $10, $1, 3
-; MMEL-NEXT: ori $1, $zero, 255
-; MMEL-NEXT: sllv $8, $1, $10
-; MMEL-NEXT: nor $9, $zero, $8
-; MMEL-NEXT: sllv $7, $5, $10
+; MMEL-NEXT: addiu $2, $zero, -4
+; MMEL-NEXT: and $2, $4, $2
+; MMEL-NEXT: andi $3, $4, 3
+; MMEL-NEXT: sll $3, $3, 3
+; MMEL-NEXT: ori $4, $zero, 255
+; MMEL-NEXT: sllv $4, $4, $3
+; MMEL-NEXT: nor $6, $zero, $4
+; MMEL-NEXT: sllv $5, $5, $3
; MMEL-NEXT: $BB9_1: # %entry
; MMEL-NEXT: # =>This Inner Loop Header: Depth=1
-; MMEL-NEXT: ll $2, 0($6)
-; MMEL-NEXT: and $2, $2, $8
-; MMEL-NEXT: and $7, $7, $8
-; MMEL-NEXT: slt $5, $2, $7
-; MMEL-NEXT: or $3, $2, $zero
-; MMEL-NEXT: movz $3, $7, $5
-; MMEL-NEXT: and $3, $3, $8
-; MMEL-NEXT: and $4, $2, $9
-; MMEL-NEXT: or $4, $4, $3
-; MMEL-NEXT: sc $4, 0($6)
-; MMEL-NEXT: beqzc $4, $BB9_1
+; MMEL-NEXT: ll $8, 0($2)
+; MMEL-NEXT: and $8, $8, $4
+; MMEL-NEXT: and $5, $5, $4
+; MMEL-NEXT: slt $11, $8, $5
+; MMEL-NEXT: or $9, $8, $zero
+; MMEL-NEXT: movz $9, $5, $11
+; MMEL-NEXT: and $9, $9, $4
+; MMEL-NEXT: and $10, $8, $6
+; MMEL-NEXT: or $10, $10, $9
+; MMEL-NEXT: sc $10, 0($2)
+; MMEL-NEXT: beqzc $10, $BB9_1
; MMEL-NEXT: # %bb.2: # %entry
-; MMEL-NEXT: and $1, $2, $8
-; MMEL-NEXT: srlv $1, $1, $10
-; MMEL-NEXT: seh $1, $1
+; MMEL-NEXT: and $7, $8, $4
+; MMEL-NEXT: srlv $7, $7, $3
+; MMEL-NEXT: seh $7, $7
; MMEL-NEXT: # %bb.3: # %entry
-; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMEL-NEXT: # %bb.4: # %entry
-; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMEL-NEXT: sync
+; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMEL-NEXT: addiusp 8
; MMEL-NEXT: jrc $ra
;
@@ -3511,39 +3511,39 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
; MMELR6: # %bb.0: # %entry
; MMELR6-NEXT: addiu $sp, $sp, -8
; MMELR6-NEXT: .cfi_def_cfa_offset 8
-; MMELR6-NEXT: # kill: def $at killed $a1
+; MMELR6-NEXT: move $1, $5
; MMELR6-NEXT: sync
-; MMELR6-NEXT: addiu $1, $zero, -4
-; MMELR6-NEXT: and $6, $4, $1
-; MMELR6-NEXT: andi $1, $4, 3
-; MMELR6-NEXT: sll $10, $1, 3
-; MMELR6-NEXT: ori $1, $zero, 255
-; MMELR6-NEXT: sllv $8, $1, $10
-; MMELR6-NEXT: nor $9, $zero, $8
-; MMELR6-NEXT: sllv $7, $5, $10
+; MMELR6-NEXT: addiu $2, $zero, -4
+; MMELR6-NEXT: and $2, $4, $2
+; MMELR6-NEXT: andi $3, $4, 3
+; MMELR6-NEXT: sll $3, $3, 3
+; MMELR6-NEXT: ori $4, $zero, 255
+; MMELR6-NEXT: sllv $4, $4, $3
+; MMELR6-NEXT: nor $6, $zero, $4
+; MMELR6-NEXT: sllv $5, $5, $3
; MMELR6-NEXT: $BB9_1: # %entry
; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MMELR6-NEXT: ll $2, 0($6)
-; MMELR6-NEXT: and $2, $2, $8
-; MMELR6-NEXT: and $7, $7, $8
-; MMELR6-NEXT: slt $5, $2, $7
-; MMELR6-NEXT: selnez $3, $2, $5
-; MMELR6-NEXT: seleqz $5, $7, $5
-; MMELR6-NEXT: or $3, $3, $5
-; MMELR6-NEXT: and $3, $3, $8
-; MMELR6-NEXT: and $4, $2, $9
-; MMELR6-NEXT: or $4, $4, $3
-; MMELR6-NEXT: sc $4, 0($6)
-; MMELR6-NEXT: beqc $4, $zero, $BB9_1
+; MMELR6-NEXT: ll $8, 0($2)
+; MMELR6-NEXT: and $8, $8, $4
+; MMELR6-NEXT: and $5, $5, $4
+; MMELR6-NEXT: slt $11, $8, $5
+; MMELR6-NEXT: selnez $9, $8, $11
+; MMELR6-NEXT: seleqz $11, $5, $11
+; MMELR6-NEXT: or $9, $9, $11
+; MMELR6-NEXT: and $9, $9, $4
+; MMELR6-NEXT: and $10, $8, $6
+; MMELR6-NEXT: or $10, $10, $9
+; MMELR6-NEXT: sc $10, 0($2)
+; MMELR6-NEXT: beqc $10, $zero, $BB9_1
; MMELR6-NEXT: # %bb.2: # %entry
-; MMELR6-NEXT: and $1, $2, $8
-; MMELR6-NEXT: srlv $1, $1, $10
-; MMELR6-NEXT: seh $1, $1
+; MMELR6-NEXT: and $7, $8, $4
+; MMELR6-NEXT: srlv $7, $7, $3
+; MMELR6-NEXT: seh $7, $7
; MMELR6-NEXT: # %bb.3: # %entry
-; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMELR6-NEXT: # %bb.4: # %entry
-; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMELR6-NEXT: sync
+; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMELR6-NEXT: addiu $sp, $sp, 8
; MMELR6-NEXT: jrc $ra
;
@@ -3551,38 +3551,38 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: daddiu $sp, $sp, -16
; MIPS64-NEXT: .cfi_def_cfa_offset 16
-; MIPS64-NEXT: move $1, $5
+; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64-NEXT: sync
-; MIPS64-NEXT: daddiu $2, $zero, -4
-; MIPS64-NEXT: and $6, $4, $2
+; MIPS64-NEXT: daddiu $1, $zero, -4
+; MIPS64-NEXT: and $1, $4, $1
; MIPS64-NEXT: andi $2, $4, 3
; MIPS64-NEXT: xori $2, $2, 3
-; MIPS64-NEXT: sll $10, $2, 3
-; MIPS64-NEXT: ori $2, $zero, 255
-; MIPS64-NEXT: sllv $8, $2, $10
-; MIPS64-NEXT: nor $9, $zero, $8
-; MIPS64-NEXT: sllv $7, $1, $10
+; MIPS64-NEXT: sll $2, $2, 3
+; MIPS64-NEXT: ori $3, $zero, 255
+; MIPS64-NEXT: sllv $3, $3, $2
+; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB9_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $2, 0($6)
-; MIPS64-NEXT: slt $5, $2, $7
-; MIPS64-NEXT: move $3, $2
-; MIPS64-NEXT: movz $3, $7, $5
-; MIPS64-NEXT: and $3, $3, $8
-; MIPS64-NEXT: and $4, $2, $9
-; MIPS64-NEXT: or $4, $4, $3
-; MIPS64-NEXT: sc $4, 0($6)
-; MIPS64-NEXT: beqz $4, .LBB9_1
+; MIPS64-NEXT: ll $7, 0($1)
+; MIPS64-NEXT: slt $10, $7, $5
+; MIPS64-NEXT: move $8, $7
+; MIPS64-NEXT: movz $8, $5, $10
+; MIPS64-NEXT: and $8, $8, $3
+; MIPS64-NEXT: and $9, $7, $4
+; MIPS64-NEXT: or $9, $9, $8
+; MIPS64-NEXT: sc $9, 0($1)
+; MIPS64-NEXT: beqz $9, .LBB9_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $1, $2, $8
-; MIPS64-NEXT: srlv $1, $1, $10
-; MIPS64-NEXT: seh $1, $1
+; MIPS64-NEXT: and $6, $7, $3
+; MIPS64-NEXT: srlv $6, $6, $2
+; MIPS64-NEXT: seh $6, $6
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
-; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: sync
+; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: daddiu $sp, $sp, 16
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
@@ -3591,38 +3591,38 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: daddiu $sp, $sp, -16
; MIPS64R6-NEXT: .cfi_def_cfa_offset 16
-; MIPS64R6-NEXT: move $1, $5
+; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64R6-NEXT: sync
-; MIPS64R6-NEXT: daddiu $2, $zero, -4
-; MIPS64R6-NEXT: and $6, $4, $2
+; MIPS64R6-NEXT: daddiu $1, $zero, -4
+; MIPS64R6-NEXT: and $1, $4, $1
; MIPS64R6-NEXT: andi $2, $4, 3
; MIPS64R6-NEXT: xori $2, $2, 3
-; MIPS64R6-NEXT: sll $10, $2, 3
-; MIPS64R6-NEXT: ori $2, $zero, 255
-; MIPS64R6-NEXT: sllv $8, $2, $10
-; MIPS64R6-NEXT: nor $9, $zero, $8
-; MIPS64R6-NEXT: sllv $7, $1, $10
+; MIPS64R6-NEXT: sll $2, $2, 3
+; MIPS64R6-NEXT: ori $3, $zero, 255
+; MIPS64R6-NEXT: sllv $3, $3, $2
+; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB9_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $2, 0($6)
-; MIPS64R6-NEXT: slt $5, $2, $7
-; MIPS64R6-NEXT: selnez $3, $2, $5
-; MIPS64R6-NEXT: seleqz $5, $7, $5
-; MIPS64R6-NEXT: or $3, $3, $5
-; MIPS64R6-NEXT: and $3, $3, $8
-; MIPS64R6-NEXT: and $4, $2, $9
-; MIPS64R6-NEXT: or $4, $4, $3
-; MIPS64R6-NEXT: sc $4, 0($6)
-; MIPS64R6-NEXT: beqzc $4, .LBB9_1
+; MIPS64R6-NEXT: ll $7, 0($1)
+; MIPS64R6-NEXT: slt $10, $7, $5
+; MIPS64R6-NEXT: selnez $8, $7, $10
+; MIPS64R6-NEXT: seleqz $10, $5, $10
+; MIPS64R6-NEXT: or $8, $8, $10
+; MIPS64R6-NEXT: and $8, $8, $3
+; MIPS64R6-NEXT: and $9, $7, $4
+; MIPS64R6-NEXT: or $9, $9, $8
+; MIPS64R6-NEXT: sc $9, 0($1)
+; MIPS64R6-NEXT: beqzc $9, .LBB9_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $1, $2, $8
-; MIPS64R6-NEXT: srlv $1, $1, $10
-; MIPS64R6-NEXT: seh $1, $1
+; MIPS64R6-NEXT: and $6, $7, $3
+; MIPS64R6-NEXT: srlv $6, $6, $2
+; MIPS64R6-NEXT: seh $6, $6
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
-; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: sync
+; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: daddiu $sp, $sp, 16
; MIPS64R6-NEXT: jrc $ra
;
@@ -3630,39 +3630,39 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64EL: # %bb.0: # %entry
; MIPS64EL-NEXT: daddiu $sp, $sp, -16
; MIPS64EL-NEXT: .cfi_def_cfa_offset 16
-; MIPS64EL-NEXT: move $1, $5
+; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64EL-NEXT: sync
-; MIPS64EL-NEXT: daddiu $2, $zero, -4
-; MIPS64EL-NEXT: and $6, $4, $2
+; MIPS64EL-NEXT: daddiu $1, $zero, -4
+; MIPS64EL-NEXT: and $1, $4, $1
; MIPS64EL-NEXT: andi $2, $4, 3
-; MIPS64EL-NEXT: sll $10, $2, 3
-; MIPS64EL-NEXT: ori $2, $zero, 255
-; MIPS64EL-NEXT: sllv $8, $2, $10
-; MIPS64EL-NEXT: nor $9, $zero, $8
-; MIPS64EL-NEXT: sllv $7, $1, $10
+; MIPS64EL-NEXT: sll $2, $2, 3
+; MIPS64EL-NEXT: ori $3, $zero, 255
+; MIPS64EL-NEXT: sllv $3, $3, $2
+; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB9_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $2, 0($6)
-; MIPS64EL-NEXT: and $2, $2, $8
-; MIPS64EL-NEXT: and $7, $7, $8
-; MIPS64EL-NEXT: slt $5, $2, $7
-; MIPS64EL-NEXT: move $3, $2
-; MIPS64EL-NEXT: movz $3, $7, $5
-; MIPS64EL-NEXT: and $3, $3, $8
-; MIPS64EL-NEXT: and $4, $2, $9
-; MIPS64EL-NEXT: or $4, $4, $3
-; MIPS64EL-NEXT: sc $4, 0($6)
-; MIPS64EL-NEXT: beqz $4, .LBB9_1
+; MIPS64EL-NEXT: ll $7, 0($1)
+; MIPS64EL-NEXT: and $7, $7, $3
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: slt $10, $7, $5
+; MIPS64EL-NEXT: move $8, $7
+; MIPS64EL-NEXT: movz $8, $5, $10
+; MIPS64EL-NEXT: and $8, $8, $3
+; MIPS64EL-NEXT: and $9, $7, $4
+; MIPS64EL-NEXT: or $9, $9, $8
+; MIPS64EL-NEXT: sc $9, 0($1)
+; MIPS64EL-NEXT: beqz $9, .LBB9_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $1, $2, $8
-; MIPS64EL-NEXT: srlv $1, $1, $10
-; MIPS64EL-NEXT: seh $1, $1
+; MIPS64EL-NEXT: and $6, $7, $3
+; MIPS64EL-NEXT: srlv $6, $6, $2
+; MIPS64EL-NEXT: seh $6, $6
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
-; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: sync
+; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: daddiu $sp, $sp, 16
; MIPS64EL-NEXT: jr $ra
; MIPS64EL-NEXT: nop
@@ -3671,39 +3671,39 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64ELR6: # %bb.0: # %entry
; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16
; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16
-; MIPS64ELR6-NEXT: move $1, $5
+; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64ELR6-NEXT: sync
-; MIPS64ELR6-NEXT: daddiu $2, $zero, -4
-; MIPS64ELR6-NEXT: and $6, $4, $2
+; MIPS64ELR6-NEXT: daddiu $1, $zero, -4
+; MIPS64ELR6-NEXT: and $1, $4, $1
; MIPS64ELR6-NEXT: andi $2, $4, 3
-; MIPS64ELR6-NEXT: sll $10, $2, 3
-; MIPS64ELR6-NEXT: ori $2, $zero, 255
-; MIPS64ELR6-NEXT: sllv $8, $2, $10
-; MIPS64ELR6-NEXT: nor $9, $zero, $8
-; MIPS64ELR6-NEXT: sllv $7, $1, $10
+; MIPS64ELR6-NEXT: sll $2, $2, 3
+; MIPS64ELR6-NEXT: ori $3, $zero, 255
+; MIPS64ELR6-NEXT: sllv $3, $3, $2
+; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB9_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $2, 0($6)
-; MIPS64ELR6-NEXT: and $2, $2, $8
-; MIPS64ELR6-NEXT: and $7, $7, $8
-; MIPS64ELR6-NEXT: slt $5, $2, $7
-; MIPS64ELR6-NEXT: selnez $3, $2, $5
-; MIPS64ELR6-NEXT: seleqz $5, $7, $5
-; MIPS64ELR6-NEXT: or $3, $3, $5
-; MIPS64ELR6-NEXT: and $3, $3, $8
-; MIPS64ELR6-NEXT: and $4, $2, $9
-; MIPS64ELR6-NEXT: or $4, $4, $3
-; MIPS64ELR6-NEXT: sc $4, 0($6)
-; MIPS64ELR6-NEXT: beqzc $4, .LBB9_1
+; MIPS64ELR6-NEXT: ll $7, 0($1)
+; MIPS64ELR6-NEXT: and $7, $7, $3
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: slt $10, $7, $5
+; MIPS64ELR6-NEXT: selnez $8, $7, $10
+; MIPS64ELR6-NEXT: seleqz $10, $5, $10
+; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: and $8, $8, $3
+; MIPS64ELR6-NEXT: and $9, $7, $4
+; MIPS64ELR6-NEXT: or $9, $9, $8
+; MIPS64ELR6-NEXT: sc $9, 0($1)
+; MIPS64ELR6-NEXT: beqzc $9, .LBB9_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $1, $2, $8
-; MIPS64ELR6-NEXT: srlv $1, $1, $10
-; MIPS64ELR6-NEXT: seh $1, $1
+; MIPS64ELR6-NEXT: and $6, $7, $3
+; MIPS64ELR6-NEXT: srlv $6, $6, $2
+; MIPS64ELR6-NEXT: seh $6, $6
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
-; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: sync
+; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16
; MIPS64ELR6-NEXT: jrc $ra
entry:
@@ -3716,38 +3716,38 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS: # %bb.0: # %entry
; MIPS-NEXT: addiu $sp, $sp, -8
; MIPS-NEXT: .cfi_def_cfa_offset 8
-; MIPS-NEXT: # kill: def $at killed $a1
+; MIPS-NEXT: move $1, $5
; MIPS-NEXT: sync
-; MIPS-NEXT: addiu $1, $zero, -4
-; MIPS-NEXT: and $6, $4, $1
-; MIPS-NEXT: andi $1, $4, 3
-; MIPS-NEXT: xori $1, $1, 3
-; MIPS-NEXT: sll $10, $1, 3
-; MIPS-NEXT: ori $1, $zero, 255
-; MIPS-NEXT: sllv $8, $1, $10
-; MIPS-NEXT: nor $9, $zero, $8
-; MIPS-NEXT: sllv $7, $5, $10
+; MIPS-NEXT: addiu $2, $zero, -4
+; MIPS-NEXT: and $2, $4, $2
+; MIPS-NEXT: andi $3, $4, 3
+; MIPS-NEXT: xori $3, $3, 3
+; MIPS-NEXT: sll $3, $3, 3
+; MIPS-NEXT: ori $4, $zero, 255
+; MIPS-NEXT: sllv $4, $4, $3
+; MIPS-NEXT: nor $6, $zero, $4
+; MIPS-NEXT: sllv $5, $5, $3
; MIPS-NEXT: $BB10_1: # %entry
; MIPS-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS-NEXT: ll $2, 0($6)
-; MIPS-NEXT: sltu $5, $2, $7
-; MIPS-NEXT: move $3, $2
-; MIPS-NEXT: movn $3, $7, $5
-; MIPS-NEXT: and $3, $3, $8
-; MIPS-NEXT: and $4, $2, $9
-; MIPS-NEXT: or $4, $4, $3
-; MIPS-NEXT: sc $4, 0($6)
-; MIPS-NEXT: beqz $4, $BB10_1
+; MIPS-NEXT: ll $8, 0($2)
+; MIPS-NEXT: sltu $11, $8, $5
+; MIPS-NEXT: move $9, $8
+; MIPS-NEXT: movn $9, $5, $11
+; MIPS-NEXT: and $9, $9, $4
+; MIPS-NEXT: and $10, $8, $6
+; MIPS-NEXT: or $10, $10, $9
+; MIPS-NEXT: sc $10, 0($2)
+; MIPS-NEXT: beqz $10, $BB10_1
; MIPS-NEXT: nop
; MIPS-NEXT: # %bb.2: # %entry
-; MIPS-NEXT: and $1, $2, $8
-; MIPS-NEXT: srlv $1, $1, $10
-; MIPS-NEXT: seh $1, $1
+; MIPS-NEXT: and $7, $8, $4
+; MIPS-NEXT: srlv $7, $7, $3
+; MIPS-NEXT: seh $7, $7
; MIPS-NEXT: # %bb.3: # %entry
-; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS-NEXT: # %bb.4: # %entry
-; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS-NEXT: sync
+; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS-NEXT: addiu $sp, $sp, 8
; MIPS-NEXT: jr $ra
; MIPS-NEXT: nop
@@ -3756,38 +3756,38 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
; MIPSR6: # %bb.0: # %entry
; MIPSR6-NEXT: addiu $sp, $sp, -8
; MIPSR6-NEXT: .cfi_def_cfa_offset 8
-; MIPSR6-NEXT: # kill: def $at killed $a1
+; MIPSR6-NEXT: move $1, $5
; MIPSR6-NEXT: sync
-; MIPSR6-NEXT: addiu $1, $zero, -4
-; MIPSR6-NEXT: and $6, $4, $1
-; MIPSR6-NEXT: andi $1, $4, 3
-; MIPSR6-NEXT: xori $1, $1, 3
-; MIPSR6-NEXT: sll $10, $1, 3
-; MIPSR6-NEXT: ori $1, $zero, 255
-; MIPSR6-NEXT: sllv $8, $1, $10
-; MIPSR6-NEXT: nor $9, $zero, $8
-; MIPSR6-NEXT: sllv $7, $5, $10
+; MIPSR6-NEXT: addiu $2, $zero, -4
+; MIPSR6-NEXT: and $2, $4, $2
+; MIPSR6-NEXT: andi $3, $4, 3
+; MIPSR6-NEXT: xori $3, $3, 3
+; MIPSR6-NEXT: sll $3, $3, 3
+; MIPSR6-NEXT: ori $4, $zero, 255
+; MIPSR6-NEXT: sllv $4, $4, $3
+; MIPSR6-NEXT: nor $6, $zero, $4
+; MIPSR6-NEXT: sllv $5, $5, $3
; MIPSR6-NEXT: $BB10_1: # %entry
; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSR6-NEXT: ll $2, 0($6)
-; MIPSR6-NEXT: sltu $5, $2, $7
-; MIPSR6-NEXT: seleqz $3, $2, $5
-; MIPSR6-NEXT: selnez $5, $7, $5
-; MIPSR6-NEXT: or $3, $3, $5
-; MIPSR6-NEXT: and $3, $3, $8
-; MIPSR6-NEXT: and $4, $2, $9
-; MIPSR6-NEXT: or $4, $4, $3
-; MIPSR6-NEXT: sc $4, 0($6)
-; MIPSR6-NEXT: beqzc $4, $BB10_1
+; MIPSR6-NEXT: ll $8, 0($2)
+; MIPSR6-NEXT: sltu $11, $8, $5
+; MIPSR6-NEXT: seleqz $9, $8, $11
+; MIPSR6-NEXT: selnez $11, $5, $11
+; MIPSR6-NEXT: or $9, $9, $11
+; MIPSR6-NEXT: and $9, $9, $4
+; MIPSR6-NEXT: and $10, $8, $6
+; MIPSR6-NEXT: or $10, $10, $9
+; MIPSR6-NEXT: sc $10, 0($2)
+; MIPSR6-NEXT: beqzc $10, $BB10_1
; MIPSR6-NEXT: # %bb.2: # %entry
-; MIPSR6-NEXT: and $1, $2, $8
-; MIPSR6-NEXT: srlv $1, $1, $10
-; MIPSR6-NEXT: seh $1, $1
+; MIPSR6-NEXT: and $7, $8, $4
+; MIPSR6-NEXT: srlv $7, $7, $3
+; MIPSR6-NEXT: seh $7, $7
; MIPSR6-NEXT: # %bb.3: # %entry
-; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSR6-NEXT: # %bb.4: # %entry
-; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSR6-NEXT: sync
+; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSR6-NEXT: addiu $sp, $sp, 8
; MIPSR6-NEXT: jrc $ra
;
@@ -3795,37 +3795,37 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
; MM: # %bb.0: # %entry
; MM-NEXT: addiu $sp, $sp, -8
; MM-NEXT: .cfi_def_cfa_offset 8
-; MM-NEXT: # kill: def $at killed $a1
+; MM-NEXT: move $1, $5
; MM-NEXT: sync
-; MM-NEXT: addiu $1, $zero, -4
-; MM-NEXT: and $6, $4, $1
-; MM-NEXT: andi $1, $4, 3
-; MM-NEXT: xori $1, $1, 3
-; MM-NEXT: sll $10, $1, 3
-; MM-NEXT: ori $1, $zero, 255
-; MM-NEXT: sllv $8, $1, $10
-; MM-NEXT: nor $9, $zero, $8
-; MM-NEXT: sllv $7, $5, $10
+; MM-NEXT: addiu $2, $zero, -4
+; MM-NEXT: and $2, $4, $2
+; MM-NEXT: andi $3, $4, 3
+; MM-NEXT: xori $3, $3, 3
+; MM-NEXT: sll $3, $3, 3
+; MM-NEXT: ori $4, $zero, 255
+; MM-NEXT: sllv $4, $4, $3
+; MM-NEXT: nor $6, $zero, $4
+; MM-NEXT: sllv $5, $5, $3
; MM-NEXT: $BB10_1: # %entry
; MM-NEXT: # =>This Inner Loop Header: Depth=1
-; MM-NEXT: ll $2, 0($6)
-; MM-NEXT: sltu $5, $2, $7
-; MM-NEXT: or $3, $2, $zero
-; MM-NEXT: movn $3, $7, $5
-; MM-NEXT: and $3, $3, $8
-; MM-NEXT: and $4, $2, $9
-; MM-NEXT: or $4, $4, $3
-; MM-NEXT: sc $4, 0($6)
-; MM-NEXT: beqzc $4, $BB10_1
+; MM-NEXT: ll $8, 0($2)
+; MM-NEXT: sltu $11, $8, $5
+; MM-NEXT: or $9, $8, $zero
+; MM-NEXT: movn $9, $5, $11
+; MM-NEXT: and $9, $9, $4
+; MM-NEXT: and $10, $8, $6
+; MM-NEXT: or $10, $10, $9
+; MM-NEXT: sc $10, 0($2)
+; MM-NEXT: beqzc $10, $BB10_1
; MM-NEXT: # %bb.2: # %entry
-; MM-NEXT: and $1, $2, $8
-; MM-NEXT: srlv $1, $1, $10
-; MM-NEXT: seh $1, $1
+; MM-NEXT: and $7, $8, $4
+; MM-NEXT: srlv $7, $7, $3
+; MM-NEXT: seh $7, $7
; MM-NEXT: # %bb.3: # %entry
-; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MM-NEXT: # %bb.4: # %entry
-; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MM-NEXT: sync
+; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MM-NEXT: addiusp 8
; MM-NEXT: jrc $ra
;
@@ -3833,38 +3833,38 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
; MMR6: # %bb.0: # %entry
; MMR6-NEXT: addiu $sp, $sp, -8
; MMR6-NEXT: .cfi_def_cfa_offset 8
-; MMR6-NEXT: # kill: def $at killed $a1
+; MMR6-NEXT: move $1, $5
; MMR6-NEXT: sync
-; MMR6-NEXT: addiu $1, $zero, -4
-; MMR6-NEXT: and $6, $4, $1
-; MMR6-NEXT: andi $1, $4, 3
-; MMR6-NEXT: xori $1, $1, 3
-; MMR6-NEXT: sll $10, $1, 3
-; MMR6-NEXT: ori $1, $zero, 255
-; MMR6-NEXT: sllv $8, $1, $10
-; MMR6-NEXT: nor $9, $zero, $8
-; MMR6-NEXT: sllv $7, $5, $10
+; MMR6-NEXT: addiu $2, $zero, -4
+; MMR6-NEXT: and $2, $4, $2
+; MMR6-NEXT: andi $3, $4, 3
+; MMR6-NEXT: xori $3, $3, 3
+; MMR6-NEXT: sll $3, $3, 3
+; MMR6-NEXT: ori $4, $zero, 255
+; MMR6-NEXT: sllv $4, $4, $3
+; MMR6-NEXT: nor $6, $zero, $4
+; MMR6-NEXT: sllv $5, $5, $3
; MMR6-NEXT: $BB10_1: # %entry
; MMR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MMR6-NEXT: ll $2, 0($6)
-; MMR6-NEXT: sltu $5, $2, $7
-; MMR6-NEXT: seleqz $3, $2, $5
-; MMR6-NEXT: selnez $5, $7, $5
-; MMR6-NEXT: or $3, $3, $5
-; MMR6-NEXT: and $3, $3, $8
-; MMR6-NEXT: and $4, $2, $9
-; MMR6-NEXT: or $4, $4, $3
-; MMR6-NEXT: sc $4, 0($6)
-; MMR6-NEXT: beqc $4, $zero, $BB10_1
+; MMR6-NEXT: ll $8, 0($2)
+; MMR6-NEXT: sltu $11, $8, $5
+; MMR6-NEXT: seleqz $9, $8, $11
+; MMR6-NEXT: selnez $11, $5, $11
+; MMR6-NEXT: or $9, $9, $11
+; MMR6-NEXT: and $9, $9, $4
+; MMR6-NEXT: and $10, $8, $6
+; MMR6-NEXT: or $10, $10, $9
+; MMR6-NEXT: sc $10, 0($2)
+; MMR6-NEXT: beqc $10, $zero, $BB10_1
; MMR6-NEXT: # %bb.2: # %entry
-; MMR6-NEXT: and $1, $2, $8
-; MMR6-NEXT: srlv $1, $1, $10
-; MMR6-NEXT: seh $1, $1
+; MMR6-NEXT: and $7, $8, $4
+; MMR6-NEXT: srlv $7, $7, $3
+; MMR6-NEXT: seh $7, $7
; MMR6-NEXT: # %bb.3: # %entry
-; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMR6-NEXT: # %bb.4: # %entry
-; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMR6-NEXT: sync
+; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMR6-NEXT: addiu $sp, $sp, 8
; MMR6-NEXT: jrc $ra
;
@@ -3872,39 +3872,39 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
; MIPSEL: # %bb.0: # %entry
; MIPSEL-NEXT: addiu $sp, $sp, -8
; MIPSEL-NEXT: .cfi_def_cfa_offset 8
-; MIPSEL-NEXT: # kill: def $at killed $a1
+; MIPSEL-NEXT: move $1, $5
; MIPSEL-NEXT: sync
-; MIPSEL-NEXT: addiu $1, $zero, -4
-; MIPSEL-NEXT: and $6, $4, $1
-; MIPSEL-NEXT: andi $1, $4, 3
-; MIPSEL-NEXT: sll $10, $1, 3
-; MIPSEL-NEXT: ori $1, $zero, 255
-; MIPSEL-NEXT: sllv $8, $1, $10
-; MIPSEL-NEXT: nor $9, $zero, $8
-; MIPSEL-NEXT: sllv $7, $5, $10
+; MIPSEL-NEXT: addiu $2, $zero, -4
+; MIPSEL-NEXT: and $2, $4, $2
+; MIPSEL-NEXT: andi $3, $4, 3
+; MIPSEL-NEXT: sll $3, $3, 3
+; MIPSEL-NEXT: ori $4, $zero, 255
+; MIPSEL-NEXT: sllv $4, $4, $3
+; MIPSEL-NEXT: nor $6, $zero, $4
+; MIPSEL-NEXT: sllv $5, $5, $3
; MIPSEL-NEXT: $BB10_1: # %entry
; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSEL-NEXT: ll $2, 0($6)
-; MIPSEL-NEXT: and $2, $2, $8
-; MIPSEL-NEXT: and $7, $7, $8
-; MIPSEL-NEXT: sltu $5, $2, $7
-; MIPSEL-NEXT: move $3, $2
-; MIPSEL-NEXT: movn $3, $7, $5
-; MIPSEL-NEXT: and $3, $3, $8
-; MIPSEL-NEXT: and $4, $2, $9
-; MIPSEL-NEXT: or $4, $4, $3
-; MIPSEL-NEXT: sc $4, 0($6)
-; MIPSEL-NEXT: beqz $4, $BB10_1
+; MIPSEL-NEXT: ll $8, 0($2)
+; MIPSEL-NEXT: and $8, $8, $4
+; MIPSEL-NEXT: and $5, $5, $4
+; MIPSEL-NEXT: sltu $11, $8, $5
+; MIPSEL-NEXT: move $9, $8
+; MIPSEL-NEXT: movn $9, $5, $11
+; MIPSEL-NEXT: and $9, $9, $4
+; MIPSEL-NEXT: and $10, $8, $6
+; MIPSEL-NEXT: or $10, $10, $9
+; MIPSEL-NEXT: sc $10, 0($2)
+; MIPSEL-NEXT: beqz $10, $BB10_1
; MIPSEL-NEXT: nop
; MIPSEL-NEXT: # %bb.2: # %entry
-; MIPSEL-NEXT: and $1, $2, $8
-; MIPSEL-NEXT: srlv $1, $1, $10
-; MIPSEL-NEXT: seh $1, $1
+; MIPSEL-NEXT: and $7, $8, $4
+; MIPSEL-NEXT: srlv $7, $7, $3
+; MIPSEL-NEXT: seh $7, $7
; MIPSEL-NEXT: # %bb.3: # %entry
-; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSEL-NEXT: # %bb.4: # %entry
-; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSEL-NEXT: sync
+; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSEL-NEXT: addiu $sp, $sp, 8
; MIPSEL-NEXT: jr $ra
; MIPSEL-NEXT: nop
@@ -3913,39 +3913,39 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
; MIPSELR6: # %bb.0: # %entry
; MIPSELR6-NEXT: addiu $sp, $sp, -8
; MIPSELR6-NEXT: .cfi_def_cfa_offset 8
-; MIPSELR6-NEXT: # kill: def $at killed $a1
+; MIPSELR6-NEXT: move $1, $5
; MIPSELR6-NEXT: sync
-; MIPSELR6-NEXT: addiu $1, $zero, -4
-; MIPSELR6-NEXT: and $6, $4, $1
-; MIPSELR6-NEXT: andi $1, $4, 3
-; MIPSELR6-NEXT: sll $10, $1, 3
-; MIPSELR6-NEXT: ori $1, $zero, 255
-; MIPSELR6-NEXT: sllv $8, $1, $10
-; MIPSELR6-NEXT: nor $9, $zero, $8
-; MIPSELR6-NEXT: sllv $7, $5, $10
+; MIPSELR6-NEXT: addiu $2, $zero, -4
+; MIPSELR6-NEXT: and $2, $4, $2
+; MIPSELR6-NEXT: andi $3, $4, 3
+; MIPSELR6-NEXT: sll $3, $3, 3
+; MIPSELR6-NEXT: ori $4, $zero, 255
+; MIPSELR6-NEXT: sllv $4, $4, $3
+; MIPSELR6-NEXT: nor $6, $zero, $4
+; MIPSELR6-NEXT: sllv $5, $5, $3
; MIPSELR6-NEXT: $BB10_1: # %entry
; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSELR6-NEXT: ll $2, 0($6)
-; MIPSELR6-NEXT: and $2, $2, $8
-; MIPSELR6-NEXT: and $7, $7, $8
-; MIPSELR6-NEXT: sltu $5, $2, $7
-; MIPSELR6-NEXT: seleqz $3, $2, $5
-; MIPSELR6-NEXT: selnez $5, $7, $5
-; MIPSELR6-NEXT: or $3, $3, $5
-; MIPSELR6-NEXT: and $3, $3, $8
-; MIPSELR6-NEXT: and $4, $2, $9
-; MIPSELR6-NEXT: or $4, $4, $3
-; MIPSELR6-NEXT: sc $4, 0($6)
-; MIPSELR6-NEXT: beqzc $4, $BB10_1
+; MIPSELR6-NEXT: ll $8, 0($2)
+; MIPSELR6-NEXT: and $8, $8, $4
+; MIPSELR6-NEXT: and $5, $5, $4
+; MIPSELR6-NEXT: sltu $11, $8, $5
+; MIPSELR6-NEXT: seleqz $9, $8, $11
+; MIPSELR6-NEXT: selnez $11, $5, $11
+; MIPSELR6-NEXT: or $9, $9, $11
+; MIPSELR6-NEXT: and $9, $9, $4
+; MIPSELR6-NEXT: and $10, $8, $6
+; MIPSELR6-NEXT: or $10, $10, $9
+; MIPSELR6-NEXT: sc $10, 0($2)
+; MIPSELR6-NEXT: beqzc $10, $BB10_1
; MIPSELR6-NEXT: # %bb.2: # %entry
-; MIPSELR6-NEXT: and $1, $2, $8
-; MIPSELR6-NEXT: srlv $1, $1, $10
-; MIPSELR6-NEXT: seh $1, $1
+; MIPSELR6-NEXT: and $7, $8, $4
+; MIPSELR6-NEXT: srlv $7, $7, $3
+; MIPSELR6-NEXT: seh $7, $7
; MIPSELR6-NEXT: # %bb.3: # %entry
-; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSELR6-NEXT: # %bb.4: # %entry
-; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSELR6-NEXT: sync
+; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSELR6-NEXT: addiu $sp, $sp, 8
; MIPSELR6-NEXT: jrc $ra
;
@@ -3953,38 +3953,38 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
; MMEL: # %bb.0: # %entry
; MMEL-NEXT: addiu $sp, $sp, -8
; MMEL-NEXT: .cfi_def_cfa_offset 8
-; MMEL-NEXT: # kill: def $at killed $a1
+; MMEL-NEXT: move $1, $5
; MMEL-NEXT: sync
-; MMEL-NEXT: addiu $1, $zero, -4
-; MMEL-NEXT: and $6, $4, $1
-; MMEL-NEXT: andi $1, $4, 3
-; MMEL-NEXT: sll $10, $1, 3
-; MMEL-NEXT: ori $1, $zero, 255
-; MMEL-NEXT: sllv $8, $1, $10
-; MMEL-NEXT: nor $9, $zero, $8
-; MMEL-NEXT: sllv $7, $5, $10
+; MMEL-NEXT: addiu $2, $zero, -4
+; MMEL-NEXT: and $2, $4, $2
+; MMEL-NEXT: andi $3, $4, 3
+; MMEL-NEXT: sll $3, $3, 3
+; MMEL-NEXT: ori $4, $zero, 255
+; MMEL-NEXT: sllv $4, $4, $3
+; MMEL-NEXT: nor $6, $zero, $4
+; MMEL-NEXT: sllv $5, $5, $3
; MMEL-NEXT: $BB10_1: # %entry
; MMEL-NEXT: # =>This Inner Loop Header: Depth=1
-; MMEL-NEXT: ll $2, 0($6)
-; MMEL-NEXT: and $2, $2, $8
-; MMEL-NEXT: and $7, $7, $8
-; MMEL-NEXT: sltu $5, $2, $7
-; MMEL-NEXT: or $3, $2, $zero
-; MMEL-NEXT: movn $3, $7, $5
-; MMEL-NEXT: and $3, $3, $8
-; MMEL-NEXT: and $4, $2, $9
-; MMEL-NEXT: or $4, $4, $3
-; MMEL-NEXT: sc $4, 0($6)
-; MMEL-NEXT: beqzc $4, $BB10_1
+; MMEL-NEXT: ll $8, 0($2)
+; MMEL-NEXT: and $8, $8, $4
+; MMEL-NEXT: and $5, $5, $4
+; MMEL-NEXT: sltu $11, $8, $5
+; MMEL-NEXT: or $9, $8, $zero
+; MMEL-NEXT: movn $9, $5, $11
+; MMEL-NEXT: and $9, $9, $4
+; MMEL-NEXT: and $10, $8, $6
+; MMEL-NEXT: or $10, $10, $9
+; MMEL-NEXT: sc $10, 0($2)
+; MMEL-NEXT: beqzc $10, $BB10_1
; MMEL-NEXT: # %bb.2: # %entry
-; MMEL-NEXT: and $1, $2, $8
-; MMEL-NEXT: srlv $1, $1, $10
-; MMEL-NEXT: seh $1, $1
+; MMEL-NEXT: and $7, $8, $4
+; MMEL-NEXT: srlv $7, $7, $3
+; MMEL-NEXT: seh $7, $7
; MMEL-NEXT: # %bb.3: # %entry
-; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMEL-NEXT: # %bb.4: # %entry
-; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMEL-NEXT: sync
+; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMEL-NEXT: addiusp 8
; MMEL-NEXT: jrc $ra
;
@@ -3992,39 +3992,39 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
; MMELR6: # %bb.0: # %entry
; MMELR6-NEXT: addiu $sp, $sp, -8
; MMELR6-NEXT: .cfi_def_cfa_offset 8
-; MMELR6-NEXT: # kill: def $at killed $a1
+; MMELR6-NEXT: move $1, $5
; MMELR6-NEXT: sync
-; MMELR6-NEXT: addiu $1, $zero, -4
-; MMELR6-NEXT: and $6, $4, $1
-; MMELR6-NEXT: andi $1, $4, 3
-; MMELR6-NEXT: sll $10, $1, 3
-; MMELR6-NEXT: ori $1, $zero, 255
-; MMELR6-NEXT: sllv $8, $1, $10
-; MMELR6-NEXT: nor $9, $zero, $8
-; MMELR6-NEXT: sllv $7, $5, $10
+; MMELR6-NEXT: addiu $2, $zero, -4
+; MMELR6-NEXT: and $2, $4, $2
+; MMELR6-NEXT: andi $3, $4, 3
+; MMELR6-NEXT: sll $3, $3, 3
+; MMELR6-NEXT: ori $4, $zero, 255
+; MMELR6-NEXT: sllv $4, $4, $3
+; MMELR6-NEXT: nor $6, $zero, $4
+; MMELR6-NEXT: sllv $5, $5, $3
; MMELR6-NEXT: $BB10_1: # %entry
; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MMELR6-NEXT: ll $2, 0($6)
-; MMELR6-NEXT: and $2, $2, $8
-; MMELR6-NEXT: and $7, $7, $8
-; MMELR6-NEXT: sltu $5, $2, $7
-; MMELR6-NEXT: seleqz $3, $2, $5
-; MMELR6-NEXT: selnez $5, $7, $5
-; MMELR6-NEXT: or $3, $3, $5
-; MMELR6-NEXT: and $3, $3, $8
-; MMELR6-NEXT: and $4, $2, $9
-; MMELR6-NEXT: or $4, $4, $3
-; MMELR6-NEXT: sc $4, 0($6)
-; MMELR6-NEXT: beqc $4, $zero, $BB10_1
+; MMELR6-NEXT: ll $8, 0($2)
+; MMELR6-NEXT: and $8, $8, $4
+; MMELR6-NEXT: and $5, $5, $4
+; MMELR6-NEXT: sltu $11, $8, $5
+; MMELR6-NEXT: seleqz $9, $8, $11
+; MMELR6-NEXT: selnez $11, $5, $11
+; MMELR6-NEXT: or $9, $9, $11
+; MMELR6-NEXT: and $9, $9, $4
+; MMELR6-NEXT: and $10, $8, $6
+; MMELR6-NEXT: or $10, $10, $9
+; MMELR6-NEXT: sc $10, 0($2)
+; MMELR6-NEXT: beqc $10, $zero, $BB10_1
; MMELR6-NEXT: # %bb.2: # %entry
-; MMELR6-NEXT: and $1, $2, $8
-; MMELR6-NEXT: srlv $1, $1, $10
-; MMELR6-NEXT: seh $1, $1
+; MMELR6-NEXT: and $7, $8, $4
+; MMELR6-NEXT: srlv $7, $7, $3
+; MMELR6-NEXT: seh $7, $7
; MMELR6-NEXT: # %bb.3: # %entry
-; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMELR6-NEXT: # %bb.4: # %entry
-; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMELR6-NEXT: sync
+; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMELR6-NEXT: addiu $sp, $sp, 8
; MMELR6-NEXT: jrc $ra
;
@@ -4032,38 +4032,38 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: daddiu $sp, $sp, -16
; MIPS64-NEXT: .cfi_def_cfa_offset 16
-; MIPS64-NEXT: move $1, $5
+; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64-NEXT: sync
-; MIPS64-NEXT: daddiu $2, $zero, -4
-; MIPS64-NEXT: and $6, $4, $2
+; MIPS64-NEXT: daddiu $1, $zero, -4
+; MIPS64-NEXT: and $1, $4, $1
; MIPS64-NEXT: andi $2, $4, 3
; MIPS64-NEXT: xori $2, $2, 3
-; MIPS64-NEXT: sll $10, $2, 3
-; MIPS64-NEXT: ori $2, $zero, 255
-; MIPS64-NEXT: sllv $8, $2, $10
-; MIPS64-NEXT: nor $9, $zero, $8
-; MIPS64-NEXT: sllv $7, $1, $10
+; MIPS64-NEXT: sll $2, $2, 3
+; MIPS64-NEXT: ori $3, $zero, 255
+; MIPS64-NEXT: sllv $3, $3, $2
+; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB10_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $2, 0($6)
-; MIPS64-NEXT: sltu $5, $2, $7
-; MIPS64-NEXT: move $3, $2
-; MIPS64-NEXT: movn $3, $7, $5
-; MIPS64-NEXT: and $3, $3, $8
-; MIPS64-NEXT: and $4, $2, $9
-; MIPS64-NEXT: or $4, $4, $3
-; MIPS64-NEXT: sc $4, 0($6)
-; MIPS64-NEXT: beqz $4, .LBB10_1
+; MIPS64-NEXT: ll $7, 0($1)
+; MIPS64-NEXT: sltu $10, $7, $5
+; MIPS64-NEXT: move $8, $7
+; MIPS64-NEXT: movn $8, $5, $10
+; MIPS64-NEXT: and $8, $8, $3
+; MIPS64-NEXT: and $9, $7, $4
+; MIPS64-NEXT: or $9, $9, $8
+; MIPS64-NEXT: sc $9, 0($1)
+; MIPS64-NEXT: beqz $9, .LBB10_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $1, $2, $8
-; MIPS64-NEXT: srlv $1, $1, $10
-; MIPS64-NEXT: seh $1, $1
+; MIPS64-NEXT: and $6, $7, $3
+; MIPS64-NEXT: srlv $6, $6, $2
+; MIPS64-NEXT: seh $6, $6
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
-; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: sync
+; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: daddiu $sp, $sp, 16
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
@@ -4072,38 +4072,38 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: daddiu $sp, $sp, -16
; MIPS64R6-NEXT: .cfi_def_cfa_offset 16
-; MIPS64R6-NEXT: move $1, $5
+; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64R6-NEXT: sync
-; MIPS64R6-NEXT: daddiu $2, $zero, -4
-; MIPS64R6-NEXT: and $6, $4, $2
+; MIPS64R6-NEXT: daddiu $1, $zero, -4
+; MIPS64R6-NEXT: and $1, $4, $1
; MIPS64R6-NEXT: andi $2, $4, 3
; MIPS64R6-NEXT: xori $2, $2, 3
-; MIPS64R6-NEXT: sll $10, $2, 3
-; MIPS64R6-NEXT: ori $2, $zero, 255
-; MIPS64R6-NEXT: sllv $8, $2, $10
-; MIPS64R6-NEXT: nor $9, $zero, $8
-; MIPS64R6-NEXT: sllv $7, $1, $10
+; MIPS64R6-NEXT: sll $2, $2, 3
+; MIPS64R6-NEXT: ori $3, $zero, 255
+; MIPS64R6-NEXT: sllv $3, $3, $2
+; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB10_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $2, 0($6)
-; MIPS64R6-NEXT: sltu $5, $2, $7
-; MIPS64R6-NEXT: seleqz $3, $2, $5
-; MIPS64R6-NEXT: selnez $5, $7, $5
-; MIPS64R6-NEXT: or $3, $3, $5
-; MIPS64R6-NEXT: and $3, $3, $8
-; MIPS64R6-NEXT: and $4, $2, $9
-; MIPS64R6-NEXT: or $4, $4, $3
-; MIPS64R6-NEXT: sc $4, 0($6)
-; MIPS64R6-NEXT: beqzc $4, .LBB10_1
+; MIPS64R6-NEXT: ll $7, 0($1)
+; MIPS64R6-NEXT: sltu $10, $7, $5
+; MIPS64R6-NEXT: seleqz $8, $7, $10
+; MIPS64R6-NEXT: selnez $10, $5, $10
+; MIPS64R6-NEXT: or $8, $8, $10
+; MIPS64R6-NEXT: and $8, $8, $3
+; MIPS64R6-NEXT: and $9, $7, $4
+; MIPS64R6-NEXT: or $9, $9, $8
+; MIPS64R6-NEXT: sc $9, 0($1)
+; MIPS64R6-NEXT: beqzc $9, .LBB10_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $1, $2, $8
-; MIPS64R6-NEXT: srlv $1, $1, $10
-; MIPS64R6-NEXT: seh $1, $1
+; MIPS64R6-NEXT: and $6, $7, $3
+; MIPS64R6-NEXT: srlv $6, $6, $2
+; MIPS64R6-NEXT: seh $6, $6
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
-; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: sync
+; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: daddiu $sp, $sp, 16
; MIPS64R6-NEXT: jrc $ra
;
@@ -4111,39 +4111,39 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64EL: # %bb.0: # %entry
; MIPS64EL-NEXT: daddiu $sp, $sp, -16
; MIPS64EL-NEXT: .cfi_def_cfa_offset 16
-; MIPS64EL-NEXT: move $1, $5
+; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64EL-NEXT: sync
-; MIPS64EL-NEXT: daddiu $2, $zero, -4
-; MIPS64EL-NEXT: and $6, $4, $2
+; MIPS64EL-NEXT: daddiu $1, $zero, -4
+; MIPS64EL-NEXT: and $1, $4, $1
; MIPS64EL-NEXT: andi $2, $4, 3
-; MIPS64EL-NEXT: sll $10, $2, 3
-; MIPS64EL-NEXT: ori $2, $zero, 255
-; MIPS64EL-NEXT: sllv $8, $2, $10
-; MIPS64EL-NEXT: nor $9, $zero, $8
-; MIPS64EL-NEXT: sllv $7, $1, $10
+; MIPS64EL-NEXT: sll $2, $2, 3
+; MIPS64EL-NEXT: ori $3, $zero, 255
+; MIPS64EL-NEXT: sllv $3, $3, $2
+; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB10_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $2, 0($6)
-; MIPS64EL-NEXT: and $2, $2, $8
-; MIPS64EL-NEXT: and $7, $7, $8
-; MIPS64EL-NEXT: sltu $5, $2, $7
-; MIPS64EL-NEXT: move $3, $2
-; MIPS64EL-NEXT: movn $3, $7, $5
-; MIPS64EL-NEXT: and $3, $3, $8
-; MIPS64EL-NEXT: and $4, $2, $9
-; MIPS64EL-NEXT: or $4, $4, $3
-; MIPS64EL-NEXT: sc $4, 0($6)
-; MIPS64EL-NEXT: beqz $4, .LBB10_1
+; MIPS64EL-NEXT: ll $7, 0($1)
+; MIPS64EL-NEXT: and $7, $7, $3
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: sltu $10, $7, $5
+; MIPS64EL-NEXT: move $8, $7
+; MIPS64EL-NEXT: movn $8, $5, $10
+; MIPS64EL-NEXT: and $8, $8, $3
+; MIPS64EL-NEXT: and $9, $7, $4
+; MIPS64EL-NEXT: or $9, $9, $8
+; MIPS64EL-NEXT: sc $9, 0($1)
+; MIPS64EL-NEXT: beqz $9, .LBB10_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $1, $2, $8
-; MIPS64EL-NEXT: srlv $1, $1, $10
-; MIPS64EL-NEXT: seh $1, $1
+; MIPS64EL-NEXT: and $6, $7, $3
+; MIPS64EL-NEXT: srlv $6, $6, $2
+; MIPS64EL-NEXT: seh $6, $6
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
-; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: sync
+; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: daddiu $sp, $sp, 16
; MIPS64EL-NEXT: jr $ra
; MIPS64EL-NEXT: nop
@@ -4152,39 +4152,39 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64ELR6: # %bb.0: # %entry
; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16
; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16
-; MIPS64ELR6-NEXT: move $1, $5
+; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64ELR6-NEXT: sync
-; MIPS64ELR6-NEXT: daddiu $2, $zero, -4
-; MIPS64ELR6-NEXT: and $6, $4, $2
+; MIPS64ELR6-NEXT: daddiu $1, $zero, -4
+; MIPS64ELR6-NEXT: and $1, $4, $1
; MIPS64ELR6-NEXT: andi $2, $4, 3
-; MIPS64ELR6-NEXT: sll $10, $2, 3
-; MIPS64ELR6-NEXT: ori $2, $zero, 255
-; MIPS64ELR6-NEXT: sllv $8, $2, $10
-; MIPS64ELR6-NEXT: nor $9, $zero, $8
-; MIPS64ELR6-NEXT: sllv $7, $1, $10
+; MIPS64ELR6-NEXT: sll $2, $2, 3
+; MIPS64ELR6-NEXT: ori $3, $zero, 255
+; MIPS64ELR6-NEXT: sllv $3, $3, $2
+; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB10_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $2, 0($6)
-; MIPS64ELR6-NEXT: and $2, $2, $8
-; MIPS64ELR6-NEXT: and $7, $7, $8
-; MIPS64ELR6-NEXT: sltu $5, $2, $7
-; MIPS64ELR6-NEXT: seleqz $3, $2, $5
-; MIPS64ELR6-NEXT: selnez $5, $7, $5
-; MIPS64ELR6-NEXT: or $3, $3, $5
-; MIPS64ELR6-NEXT: and $3, $3, $8
-; MIPS64ELR6-NEXT: and $4, $2, $9
-; MIPS64ELR6-NEXT: or $4, $4, $3
-; MIPS64ELR6-NEXT: sc $4, 0($6)
-; MIPS64ELR6-NEXT: beqzc $4, .LBB10_1
+; MIPS64ELR6-NEXT: ll $7, 0($1)
+; MIPS64ELR6-NEXT: and $7, $7, $3
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: sltu $10, $7, $5
+; MIPS64ELR6-NEXT: seleqz $8, $7, $10
+; MIPS64ELR6-NEXT: selnez $10, $5, $10
+; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: and $8, $8, $3
+; MIPS64ELR6-NEXT: and $9, $7, $4
+; MIPS64ELR6-NEXT: or $9, $9, $8
+; MIPS64ELR6-NEXT: sc $9, 0($1)
+; MIPS64ELR6-NEXT: beqzc $9, .LBB10_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $1, $2, $8
-; MIPS64ELR6-NEXT: srlv $1, $1, $10
-; MIPS64ELR6-NEXT: seh $1, $1
+; MIPS64ELR6-NEXT: and $6, $7, $3
+; MIPS64ELR6-NEXT: srlv $6, $6, $2
+; MIPS64ELR6-NEXT: seh $6, $6
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
-; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: sync
+; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16
; MIPS64ELR6-NEXT: jrc $ra
entry:
@@ -4197,38 +4197,38 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS: # %bb.0: # %entry
; MIPS-NEXT: addiu $sp, $sp, -8
; MIPS-NEXT: .cfi_def_cfa_offset 8
-; MIPS-NEXT: # kill: def $at killed $a1
+; MIPS-NEXT: move $1, $5
; MIPS-NEXT: sync
-; MIPS-NEXT: addiu $1, $zero, -4
-; MIPS-NEXT: and $6, $4, $1
-; MIPS-NEXT: andi $1, $4, 3
-; MIPS-NEXT: xori $1, $1, 3
-; MIPS-NEXT: sll $10, $1, 3
-; MIPS-NEXT: ori $1, $zero, 255
-; MIPS-NEXT: sllv $8, $1, $10
-; MIPS-NEXT: nor $9, $zero, $8
-; MIPS-NEXT: sllv $7, $5, $10
+; MIPS-NEXT: addiu $2, $zero, -4
+; MIPS-NEXT: and $2, $4, $2
+; MIPS-NEXT: andi $3, $4, 3
+; MIPS-NEXT: xori $3, $3, 3
+; MIPS-NEXT: sll $3, $3, 3
+; MIPS-NEXT: ori $4, $zero, 255
+; MIPS-NEXT: sllv $4, $4, $3
+; MIPS-NEXT: nor $6, $zero, $4
+; MIPS-NEXT: sllv $5, $5, $3
; MIPS-NEXT: $BB11_1: # %entry
; MIPS-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS-NEXT: ll $2, 0($6)
-; MIPS-NEXT: sltu $5, $2, $7
-; MIPS-NEXT: move $3, $2
-; MIPS-NEXT: movz $3, $7, $5
-; MIPS-NEXT: and $3, $3, $8
-; MIPS-NEXT: and $4, $2, $9
-; MIPS-NEXT: or $4, $4, $3
-; MIPS-NEXT: sc $4, 0($6)
-; MIPS-NEXT: beqz $4, $BB11_1
+; MIPS-NEXT: ll $8, 0($2)
+; MIPS-NEXT: sltu $11, $8, $5
+; MIPS-NEXT: move $9, $8
+; MIPS-NEXT: movz $9, $5, $11
+; MIPS-NEXT: and $9, $9, $4
+; MIPS-NEXT: and $10, $8, $6
+; MIPS-NEXT: or $10, $10, $9
+; MIPS-NEXT: sc $10, 0($2)
+; MIPS-NEXT: beqz $10, $BB11_1
; MIPS-NEXT: nop
; MIPS-NEXT: # %bb.2: # %entry
-; MIPS-NEXT: and $1, $2, $8
-; MIPS-NEXT: srlv $1, $1, $10
-; MIPS-NEXT: seh $1, $1
+; MIPS-NEXT: and $7, $8, $4
+; MIPS-NEXT: srlv $7, $7, $3
+; MIPS-NEXT: seh $7, $7
; MIPS-NEXT: # %bb.3: # %entry
-; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS-NEXT: # %bb.4: # %entry
-; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS-NEXT: sync
+; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS-NEXT: addiu $sp, $sp, 8
; MIPS-NEXT: jr $ra
; MIPS-NEXT: nop
@@ -4237,38 +4237,38 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
; MIPSR6: # %bb.0: # %entry
; MIPSR6-NEXT: addiu $sp, $sp, -8
; MIPSR6-NEXT: .cfi_def_cfa_offset 8
-; MIPSR6-NEXT: # kill: def $at killed $a1
+; MIPSR6-NEXT: move $1, $5
; MIPSR6-NEXT: sync
-; MIPSR6-NEXT: addiu $1, $zero, -4
-; MIPSR6-NEXT: and $6, $4, $1
-; MIPSR6-NEXT: andi $1, $4, 3
-; MIPSR6-NEXT: xori $1, $1, 3
-; MIPSR6-NEXT: sll $10, $1, 3
-; MIPSR6-NEXT: ori $1, $zero, 255
-; MIPSR6-NEXT: sllv $8, $1, $10
-; MIPSR6-NEXT: nor $9, $zero, $8
-; MIPSR6-NEXT: sllv $7, $5, $10
+; MIPSR6-NEXT: addiu $2, $zero, -4
+; MIPSR6-NEXT: and $2, $4, $2
+; MIPSR6-NEXT: andi $3, $4, 3
+; MIPSR6-NEXT: xori $3, $3, 3
+; MIPSR6-NEXT: sll $3, $3, 3
+; MIPSR6-NEXT: ori $4, $zero, 255
+; MIPSR6-NEXT: sllv $4, $4, $3
+; MIPSR6-NEXT: nor $6, $zero, $4
+; MIPSR6-NEXT: sllv $5, $5, $3
; MIPSR6-NEXT: $BB11_1: # %entry
; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSR6-NEXT: ll $2, 0($6)
-; MIPSR6-NEXT: sltu $5, $2, $7
-; MIPSR6-NEXT: selnez $3, $2, $5
-; MIPSR6-NEXT: seleqz $5, $7, $5
-; MIPSR6-NEXT: or $3, $3, $5
-; MIPSR6-NEXT: and $3, $3, $8
-; MIPSR6-NEXT: and $4, $2, $9
-; MIPSR6-NEXT: or $4, $4, $3
-; MIPSR6-NEXT: sc $4, 0($6)
-; MIPSR6-NEXT: beqzc $4, $BB11_1
+; MIPSR6-NEXT: ll $8, 0($2)
+; MIPSR6-NEXT: sltu $11, $8, $5
+; MIPSR6-NEXT: selnez $9, $8, $11
+; MIPSR6-NEXT: seleqz $11, $5, $11
+; MIPSR6-NEXT: or $9, $9, $11
+; MIPSR6-NEXT: and $9, $9, $4
+; MIPSR6-NEXT: and $10, $8, $6
+; MIPSR6-NEXT: or $10, $10, $9
+; MIPSR6-NEXT: sc $10, 0($2)
+; MIPSR6-NEXT: beqzc $10, $BB11_1
; MIPSR6-NEXT: # %bb.2: # %entry
-; MIPSR6-NEXT: and $1, $2, $8
-; MIPSR6-NEXT: srlv $1, $1, $10
-; MIPSR6-NEXT: seh $1, $1
+; MIPSR6-NEXT: and $7, $8, $4
+; MIPSR6-NEXT: srlv $7, $7, $3
+; MIPSR6-NEXT: seh $7, $7
; MIPSR6-NEXT: # %bb.3: # %entry
-; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSR6-NEXT: # %bb.4: # %entry
-; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSR6-NEXT: sync
+; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSR6-NEXT: addiu $sp, $sp, 8
; MIPSR6-NEXT: jrc $ra
;
@@ -4276,37 +4276,37 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
; MM: # %bb.0: # %entry
; MM-NEXT: addiu $sp, $sp, -8
; MM-NEXT: .cfi_def_cfa_offset 8
-; MM-NEXT: # kill: def $at killed $a1
+; MM-NEXT: move $1, $5
; MM-NEXT: sync
-; MM-NEXT: addiu $1, $zero, -4
-; MM-NEXT: and $6, $4, $1
-; MM-NEXT: andi $1, $4, 3
-; MM-NEXT: xori $1, $1, 3
-; MM-NEXT: sll $10, $1, 3
-; MM-NEXT: ori $1, $zero, 255
-; MM-NEXT: sllv $8, $1, $10
-; MM-NEXT: nor $9, $zero, $8
-; MM-NEXT: sllv $7, $5, $10
+; MM-NEXT: addiu $2, $zero, -4
+; MM-NEXT: and $2, $4, $2
+; MM-NEXT: andi $3, $4, 3
+; MM-NEXT: xori $3, $3, 3
+; MM-NEXT: sll $3, $3, 3
+; MM-NEXT: ori $4, $zero, 255
+; MM-NEXT: sllv $4, $4, $3
+; MM-NEXT: nor $6, $zero, $4
+; MM-NEXT: sllv $5, $5, $3
; MM-NEXT: $BB11_1: # %entry
; MM-NEXT: # =>This Inner Loop Header: Depth=1
-; MM-NEXT: ll $2, 0($6)
-; MM-NEXT: sltu $5, $2, $7
-; MM-NEXT: or $3, $2, $zero
-; MM-NEXT: movz $3, $7, $5
-; MM-NEXT: and $3, $3, $8
-; MM-NEXT: and $4, $2, $9
-; MM-NEXT: or $4, $4, $3
-; MM-NEXT: sc $4, 0($6)
-; MM-NEXT: beqzc $4, $BB11_1
+; MM-NEXT: ll $8, 0($2)
+; MM-NEXT: sltu $11, $8, $5
+; MM-NEXT: or $9, $8, $zero
+; MM-NEXT: movz $9, $5, $11
+; MM-NEXT: and $9, $9, $4
+; MM-NEXT: and $10, $8, $6
+; MM-NEXT: or $10, $10, $9
+; MM-NEXT: sc $10, 0($2)
+; MM-NEXT: beqzc $10, $BB11_1
; MM-NEXT: # %bb.2: # %entry
-; MM-NEXT: and $1, $2, $8
-; MM-NEXT: srlv $1, $1, $10
-; MM-NEXT: seh $1, $1
+; MM-NEXT: and $7, $8, $4
+; MM-NEXT: srlv $7, $7, $3
+; MM-NEXT: seh $7, $7
; MM-NEXT: # %bb.3: # %entry
-; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MM-NEXT: # %bb.4: # %entry
-; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MM-NEXT: sync
+; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MM-NEXT: addiusp 8
; MM-NEXT: jrc $ra
;
@@ -4314,38 +4314,38 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
; MMR6: # %bb.0: # %entry
; MMR6-NEXT: addiu $sp, $sp, -8
; MMR6-NEXT: .cfi_def_cfa_offset 8
-; MMR6-NEXT: # kill: def $at killed $a1
+; MMR6-NEXT: move $1, $5
; MMR6-NEXT: sync
-; MMR6-NEXT: addiu $1, $zero, -4
-; MMR6-NEXT: and $6, $4, $1
-; MMR6-NEXT: andi $1, $4, 3
-; MMR6-NEXT: xori $1, $1, 3
-; MMR6-NEXT: sll $10, $1, 3
-; MMR6-NEXT: ori $1, $zero, 255
-; MMR6-NEXT: sllv $8, $1, $10
-; MMR6-NEXT: nor $9, $zero, $8
-; MMR6-NEXT: sllv $7, $5, $10
+; MMR6-NEXT: addiu $2, $zero, -4
+; MMR6-NEXT: and $2, $4, $2
+; MMR6-NEXT: andi $3, $4, 3
+; MMR6-NEXT: xori $3, $3, 3
+; MMR6-NEXT: sll $3, $3, 3
+; MMR6-NEXT: ori $4, $zero, 255
+; MMR6-NEXT: sllv $4, $4, $3
+; MMR6-NEXT: nor $6, $zero, $4
+; MMR6-NEXT: sllv $5, $5, $3
; MMR6-NEXT: $BB11_1: # %entry
; MMR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MMR6-NEXT: ll $2, 0($6)
-; MMR6-NEXT: sltu $5, $2, $7
-; MMR6-NEXT: selnez $3, $2, $5
-; MMR6-NEXT: seleqz $5, $7, $5
-; MMR6-NEXT: or $3, $3, $5
-; MMR6-NEXT: and $3, $3, $8
-; MMR6-NEXT: and $4, $2, $9
-; MMR6-NEXT: or $4, $4, $3
-; MMR6-NEXT: sc $4, 0($6)
-; MMR6-NEXT: beqc $4, $zero, $BB11_1
+; MMR6-NEXT: ll $8, 0($2)
+; MMR6-NEXT: sltu $11, $8, $5
+; MMR6-NEXT: selnez $9, $8, $11
+; MMR6-NEXT: seleqz $11, $5, $11
+; MMR6-NEXT: or $9, $9, $11
+; MMR6-NEXT: and $9, $9, $4
+; MMR6-NEXT: and $10, $8, $6
+; MMR6-NEXT: or $10, $10, $9
+; MMR6-NEXT: sc $10, 0($2)
+; MMR6-NEXT: beqc $10, $zero, $BB11_1
; MMR6-NEXT: # %bb.2: # %entry
-; MMR6-NEXT: and $1, $2, $8
-; MMR6-NEXT: srlv $1, $1, $10
-; MMR6-NEXT: seh $1, $1
+; MMR6-NEXT: and $7, $8, $4
+; MMR6-NEXT: srlv $7, $7, $3
+; MMR6-NEXT: seh $7, $7
; MMR6-NEXT: # %bb.3: # %entry
-; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMR6-NEXT: # %bb.4: # %entry
-; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMR6-NEXT: sync
+; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMR6-NEXT: addiu $sp, $sp, 8
; MMR6-NEXT: jrc $ra
;
@@ -4353,39 +4353,39 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
; MIPSEL: # %bb.0: # %entry
; MIPSEL-NEXT: addiu $sp, $sp, -8
; MIPSEL-NEXT: .cfi_def_cfa_offset 8
-; MIPSEL-NEXT: # kill: def $at killed $a1
+; MIPSEL-NEXT: move $1, $5
; MIPSEL-NEXT: sync
-; MIPSEL-NEXT: addiu $1, $zero, -4
-; MIPSEL-NEXT: and $6, $4, $1
-; MIPSEL-NEXT: andi $1, $4, 3
-; MIPSEL-NEXT: sll $10, $1, 3
-; MIPSEL-NEXT: ori $1, $zero, 255
-; MIPSEL-NEXT: sllv $8, $1, $10
-; MIPSEL-NEXT: nor $9, $zero, $8
-; MIPSEL-NEXT: sllv $7, $5, $10
+; MIPSEL-NEXT: addiu $2, $zero, -4
+; MIPSEL-NEXT: and $2, $4, $2
+; MIPSEL-NEXT: andi $3, $4, 3
+; MIPSEL-NEXT: sll $3, $3, 3
+; MIPSEL-NEXT: ori $4, $zero, 255
+; MIPSEL-NEXT: sllv $4, $4, $3
+; MIPSEL-NEXT: nor $6, $zero, $4
+; MIPSEL-NEXT: sllv $5, $5, $3
; MIPSEL-NEXT: $BB11_1: # %entry
; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSEL-NEXT: ll $2, 0($6)
-; MIPSEL-NEXT: and $2, $2, $8
-; MIPSEL-NEXT: and $7, $7, $8
-; MIPSEL-NEXT: sltu $5, $2, $7
-; MIPSEL-NEXT: move $3, $2
-; MIPSEL-NEXT: movz $3, $7, $5
-; MIPSEL-NEXT: and $3, $3, $8
-; MIPSEL-NEXT: and $4, $2, $9
-; MIPSEL-NEXT: or $4, $4, $3
-; MIPSEL-NEXT: sc $4, 0($6)
-; MIPSEL-NEXT: beqz $4, $BB11_1
+; MIPSEL-NEXT: ll $8, 0($2)
+; MIPSEL-NEXT: and $8, $8, $4
+; MIPSEL-NEXT: and $5, $5, $4
+; MIPSEL-NEXT: sltu $11, $8, $5
+; MIPSEL-NEXT: move $9, $8
+; MIPSEL-NEXT: movz $9, $5, $11
+; MIPSEL-NEXT: and $9, $9, $4
+; MIPSEL-NEXT: and $10, $8, $6
+; MIPSEL-NEXT: or $10, $10, $9
+; MIPSEL-NEXT: sc $10, 0($2)
+; MIPSEL-NEXT: beqz $10, $BB11_1
; MIPSEL-NEXT: nop
; MIPSEL-NEXT: # %bb.2: # %entry
-; MIPSEL-NEXT: and $1, $2, $8
-; MIPSEL-NEXT: srlv $1, $1, $10
-; MIPSEL-NEXT: seh $1, $1
+; MIPSEL-NEXT: and $7, $8, $4
+; MIPSEL-NEXT: srlv $7, $7, $3
+; MIPSEL-NEXT: seh $7, $7
; MIPSEL-NEXT: # %bb.3: # %entry
-; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSEL-NEXT: # %bb.4: # %entry
-; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSEL-NEXT: sync
+; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSEL-NEXT: addiu $sp, $sp, 8
; MIPSEL-NEXT: jr $ra
; MIPSEL-NEXT: nop
@@ -4394,39 +4394,39 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
; MIPSELR6: # %bb.0: # %entry
; MIPSELR6-NEXT: addiu $sp, $sp, -8
; MIPSELR6-NEXT: .cfi_def_cfa_offset 8
-; MIPSELR6-NEXT: # kill: def $at killed $a1
+; MIPSELR6-NEXT: move $1, $5
; MIPSELR6-NEXT: sync
-; MIPSELR6-NEXT: addiu $1, $zero, -4
-; MIPSELR6-NEXT: and $6, $4, $1
-; MIPSELR6-NEXT: andi $1, $4, 3
-; MIPSELR6-NEXT: sll $10, $1, 3
-; MIPSELR6-NEXT: ori $1, $zero, 255
-; MIPSELR6-NEXT: sllv $8, $1, $10
-; MIPSELR6-NEXT: nor $9, $zero, $8
-; MIPSELR6-NEXT: sllv $7, $5, $10
+; MIPSELR6-NEXT: addiu $2, $zero, -4
+; MIPSELR6-NEXT: and $2, $4, $2
+; MIPSELR6-NEXT: andi $3, $4, 3
+; MIPSELR6-NEXT: sll $3, $3, 3
+; MIPSELR6-NEXT: ori $4, $zero, 255
+; MIPSELR6-NEXT: sllv $4, $4, $3
+; MIPSELR6-NEXT: nor $6, $zero, $4
+; MIPSELR6-NEXT: sllv $5, $5, $3
; MIPSELR6-NEXT: $BB11_1: # %entry
; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPSELR6-NEXT: ll $2, 0($6)
-; MIPSELR6-NEXT: and $2, $2, $8
-; MIPSELR6-NEXT: and $7, $7, $8
-; MIPSELR6-NEXT: sltu $5, $2, $7
-; MIPSELR6-NEXT: selnez $3, $2, $5
-; MIPSELR6-NEXT: seleqz $5, $7, $5
-; MIPSELR6-NEXT: or $3, $3, $5
-; MIPSELR6-NEXT: and $3, $3, $8
-; MIPSELR6-NEXT: and $4, $2, $9
-; MIPSELR6-NEXT: or $4, $4, $3
-; MIPSELR6-NEXT: sc $4, 0($6)
-; MIPSELR6-NEXT: beqzc $4, $BB11_1
+; MIPSELR6-NEXT: ll $8, 0($2)
+; MIPSELR6-NEXT: and $8, $8, $4
+; MIPSELR6-NEXT: and $5, $5, $4
+; MIPSELR6-NEXT: sltu $11, $8, $5
+; MIPSELR6-NEXT: selnez $9, $8, $11
+; MIPSELR6-NEXT: seleqz $11, $5, $11
+; MIPSELR6-NEXT: or $9, $9, $11
+; MIPSELR6-NEXT: and $9, $9, $4
+; MIPSELR6-NEXT: and $10, $8, $6
+; MIPSELR6-NEXT: or $10, $10, $9
+; MIPSELR6-NEXT: sc $10, 0($2)
+; MIPSELR6-NEXT: beqzc $10, $BB11_1
; MIPSELR6-NEXT: # %bb.2: # %entry
-; MIPSELR6-NEXT: and $1, $2, $8
-; MIPSELR6-NEXT: srlv $1, $1, $10
-; MIPSELR6-NEXT: seh $1, $1
+; MIPSELR6-NEXT: and $7, $8, $4
+; MIPSELR6-NEXT: srlv $7, $7, $3
+; MIPSELR6-NEXT: seh $7, $7
; MIPSELR6-NEXT: # %bb.3: # %entry
-; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPSELR6-NEXT: # %bb.4: # %entry
-; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSELR6-NEXT: sync
+; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPSELR6-NEXT: addiu $sp, $sp, 8
; MIPSELR6-NEXT: jrc $ra
;
@@ -4434,38 +4434,38 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
; MMEL: # %bb.0: # %entry
; MMEL-NEXT: addiu $sp, $sp, -8
; MMEL-NEXT: .cfi_def_cfa_offset 8
-; MMEL-NEXT: # kill: def $at killed $a1
+; MMEL-NEXT: move $1, $5
; MMEL-NEXT: sync
-; MMEL-NEXT: addiu $1, $zero, -4
-; MMEL-NEXT: and $6, $4, $1
-; MMEL-NEXT: andi $1, $4, 3
-; MMEL-NEXT: sll $10, $1, 3
-; MMEL-NEXT: ori $1, $zero, 255
-; MMEL-NEXT: sllv $8, $1, $10
-; MMEL-NEXT: nor $9, $zero, $8
-; MMEL-NEXT: sllv $7, $5, $10
+; MMEL-NEXT: addiu $2, $zero, -4
+; MMEL-NEXT: and $2, $4, $2
+; MMEL-NEXT: andi $3, $4, 3
+; MMEL-NEXT: sll $3, $3, 3
+; MMEL-NEXT: ori $4, $zero, 255
+; MMEL-NEXT: sllv $4, $4, $3
+; MMEL-NEXT: nor $6, $zero, $4
+; MMEL-NEXT: sllv $5, $5, $3
; MMEL-NEXT: $BB11_1: # %entry
; MMEL-NEXT: # =>This Inner Loop Header: Depth=1
-; MMEL-NEXT: ll $2, 0($6)
-; MMEL-NEXT: and $2, $2, $8
-; MMEL-NEXT: and $7, $7, $8
-; MMEL-NEXT: sltu $5, $2, $7
-; MMEL-NEXT: or $3, $2, $zero
-; MMEL-NEXT: movz $3, $7, $5
-; MMEL-NEXT: and $3, $3, $8
-; MMEL-NEXT: and $4, $2, $9
-; MMEL-NEXT: or $4, $4, $3
-; MMEL-NEXT: sc $4, 0($6)
-; MMEL-NEXT: beqzc $4, $BB11_1
+; MMEL-NEXT: ll $8, 0($2)
+; MMEL-NEXT: and $8, $8, $4
+; MMEL-NEXT: and $5, $5, $4
+; MMEL-NEXT: sltu $11, $8, $5
+; MMEL-NEXT: or $9, $8, $zero
+; MMEL-NEXT: movz $9, $5, $11
+; MMEL-NEXT: and $9, $9, $4
+; MMEL-NEXT: and $10, $8, $6
+; MMEL-NEXT: or $10, $10, $9
+; MMEL-NEXT: sc $10, 0($2)
+; MMEL-NEXT: beqzc $10, $BB11_1
; MMEL-NEXT: # %bb.2: # %entry
-; MMEL-NEXT: and $1, $2, $8
-; MMEL-NEXT: srlv $1, $1, $10
-; MMEL-NEXT: seh $1, $1
+; MMEL-NEXT: and $7, $8, $4
+; MMEL-NEXT: srlv $7, $7, $3
+; MMEL-NEXT: seh $7, $7
; MMEL-NEXT: # %bb.3: # %entry
-; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMEL-NEXT: # %bb.4: # %entry
-; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMEL-NEXT: sync
+; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMEL-NEXT: addiusp 8
; MMEL-NEXT: jrc $ra
;
@@ -4473,39 +4473,39 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
; MMELR6: # %bb.0: # %entry
; MMELR6-NEXT: addiu $sp, $sp, -8
; MMELR6-NEXT: .cfi_def_cfa_offset 8
-; MMELR6-NEXT: # kill: def $at killed $a1
+; MMELR6-NEXT: move $1, $5
; MMELR6-NEXT: sync
-; MMELR6-NEXT: addiu $1, $zero, -4
-; MMELR6-NEXT: and $6, $4, $1
-; MMELR6-NEXT: andi $1, $4, 3
-; MMELR6-NEXT: sll $10, $1, 3
-; MMELR6-NEXT: ori $1, $zero, 255
-; MMELR6-NEXT: sllv $8, $1, $10
-; MMELR6-NEXT: nor $9, $zero, $8
-; MMELR6-NEXT: sllv $7, $5, $10
+; MMELR6-NEXT: addiu $2, $zero, -4
+; MMELR6-NEXT: and $2, $4, $2
+; MMELR6-NEXT: andi $3, $4, 3
+; MMELR6-NEXT: sll $3, $3, 3
+; MMELR6-NEXT: ori $4, $zero, 255
+; MMELR6-NEXT: sllv $4, $4, $3
+; MMELR6-NEXT: nor $6, $zero, $4
+; MMELR6-NEXT: sllv $5, $5, $3
; MMELR6-NEXT: $BB11_1: # %entry
; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MMELR6-NEXT: ll $2, 0($6)
-; MMELR6-NEXT: and $2, $2, $8
-; MMELR6-NEXT: and $7, $7, $8
-; MMELR6-NEXT: sltu $5, $2, $7
-; MMELR6-NEXT: selnez $3, $2, $5
-; MMELR6-NEXT: seleqz $5, $7, $5
-; MMELR6-NEXT: or $3, $3, $5
-; MMELR6-NEXT: and $3, $3, $8
-; MMELR6-NEXT: and $4, $2, $9
-; MMELR6-NEXT: or $4, $4, $3
-; MMELR6-NEXT: sc $4, 0($6)
-; MMELR6-NEXT: beqc $4, $zero, $BB11_1
+; MMELR6-NEXT: ll $8, 0($2)
+; MMELR6-NEXT: and $8, $8, $4
+; MMELR6-NEXT: and $5, $5, $4
+; MMELR6-NEXT: sltu $11, $8, $5
+; MMELR6-NEXT: selnez $9, $8, $11
+; MMELR6-NEXT: seleqz $11, $5, $11
+; MMELR6-NEXT: or $9, $9, $11
+; MMELR6-NEXT: and $9, $9, $4
+; MMELR6-NEXT: and $10, $8, $6
+; MMELR6-NEXT: or $10, $10, $9
+; MMELR6-NEXT: sc $10, 0($2)
+; MMELR6-NEXT: beqc $10, $zero, $BB11_1
; MMELR6-NEXT: # %bb.2: # %entry
-; MMELR6-NEXT: and $1, $2, $8
-; MMELR6-NEXT: srlv $1, $1, $10
-; MMELR6-NEXT: seh $1, $1
+; MMELR6-NEXT: and $7, $8, $4
+; MMELR6-NEXT: srlv $7, $7, $3
+; MMELR6-NEXT: seh $7, $7
; MMELR6-NEXT: # %bb.3: # %entry
-; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MMELR6-NEXT: # %bb.4: # %entry
-; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMELR6-NEXT: sync
+; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MMELR6-NEXT: addiu $sp, $sp, 8
; MMELR6-NEXT: jrc $ra
;
@@ -4513,38 +4513,38 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: daddiu $sp, $sp, -16
; MIPS64-NEXT: .cfi_def_cfa_offset 16
-; MIPS64-NEXT: move $1, $5
+; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64-NEXT: sync
-; MIPS64-NEXT: daddiu $2, $zero, -4
-; MIPS64-NEXT: and $6, $4, $2
+; MIPS64-NEXT: daddiu $1, $zero, -4
+; MIPS64-NEXT: and $1, $4, $1
; MIPS64-NEXT: andi $2, $4, 3
; MIPS64-NEXT: xori $2, $2, 3
-; MIPS64-NEXT: sll $10, $2, 3
-; MIPS64-NEXT: ori $2, $zero, 255
-; MIPS64-NEXT: sllv $8, $2, $10
-; MIPS64-NEXT: nor $9, $zero, $8
-; MIPS64-NEXT: sllv $7, $1, $10
+; MIPS64-NEXT: sll $2, $2, 3
+; MIPS64-NEXT: ori $3, $zero, 255
+; MIPS64-NEXT: sllv $3, $3, $2
+; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB11_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $2, 0($6)
-; MIPS64-NEXT: sltu $5, $2, $7
-; MIPS64-NEXT: move $3, $2
-; MIPS64-NEXT: movz $3, $7, $5
-; MIPS64-NEXT: and $3, $3, $8
-; MIPS64-NEXT: and $4, $2, $9
-; MIPS64-NEXT: or $4, $4, $3
-; MIPS64-NEXT: sc $4, 0($6)
-; MIPS64-NEXT: beqz $4, .LBB11_1
+; MIPS64-NEXT: ll $7, 0($1)
+; MIPS64-NEXT: sltu $10, $7, $5
+; MIPS64-NEXT: move $8, $7
+; MIPS64-NEXT: movz $8, $5, $10
+; MIPS64-NEXT: and $8, $8, $3
+; MIPS64-NEXT: and $9, $7, $4
+; MIPS64-NEXT: or $9, $9, $8
+; MIPS64-NEXT: sc $9, 0($1)
+; MIPS64-NEXT: beqz $9, .LBB11_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $1, $2, $8
-; MIPS64-NEXT: srlv $1, $1, $10
-; MIPS64-NEXT: seh $1, $1
+; MIPS64-NEXT: and $6, $7, $3
+; MIPS64-NEXT: srlv $6, $6, $2
+; MIPS64-NEXT: seh $6, $6
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
-; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: sync
+; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: daddiu $sp, $sp, 16
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
@@ -4553,38 +4553,38 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: daddiu $sp, $sp, -16
; MIPS64R6-NEXT: .cfi_def_cfa_offset 16
-; MIPS64R6-NEXT: move $1, $5
+; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64R6-NEXT: sync
-; MIPS64R6-NEXT: daddiu $2, $zero, -4
-; MIPS64R6-NEXT: and $6, $4, $2
+; MIPS64R6-NEXT: daddiu $1, $zero, -4
+; MIPS64R6-NEXT: and $1, $4, $1
; MIPS64R6-NEXT: andi $2, $4, 3
; MIPS64R6-NEXT: xori $2, $2, 3
-; MIPS64R6-NEXT: sll $10, $2, 3
-; MIPS64R6-NEXT: ori $2, $zero, 255
-; MIPS64R6-NEXT: sllv $8, $2, $10
-; MIPS64R6-NEXT: nor $9, $zero, $8
-; MIPS64R6-NEXT: sllv $7, $1, $10
+; MIPS64R6-NEXT: sll $2, $2, 3
+; MIPS64R6-NEXT: ori $3, $zero, 255
+; MIPS64R6-NEXT: sllv $3, $3, $2
+; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB11_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $2, 0($6)
-; MIPS64R6-NEXT: sltu $5, $2, $7
-; MIPS64R6-NEXT: selnez $3, $2, $5
-; MIPS64R6-NEXT: seleqz $5, $7, $5
-; MIPS64R6-NEXT: or $3, $3, $5
-; MIPS64R6-NEXT: and $3, $3, $8
-; MIPS64R6-NEXT: and $4, $2, $9
-; MIPS64R6-NEXT: or $4, $4, $3
-; MIPS64R6-NEXT: sc $4, 0($6)
-; MIPS64R6-NEXT: beqzc $4, .LBB11_1
+; MIPS64R6-NEXT: ll $7, 0($1)
+; MIPS64R6-NEXT: sltu $10, $7, $5
+; MIPS64R6-NEXT: selnez $8, $7, $10
+; MIPS64R6-NEXT: seleqz $10, $5, $10
+; MIPS64R6-NEXT: or $8, $8, $10
+; MIPS64R6-NEXT: and $8, $8, $3
+; MIPS64R6-NEXT: and $9, $7, $4
+; MIPS64R6-NEXT: or $9, $9, $8
+; MIPS64R6-NEXT: sc $9, 0($1)
+; MIPS64R6-NEXT: beqzc $9, .LBB11_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $1, $2, $8
-; MIPS64R6-NEXT: srlv $1, $1, $10
-; MIPS64R6-NEXT: seh $1, $1
+; MIPS64R6-NEXT: and $6, $7, $3
+; MIPS64R6-NEXT: srlv $6, $6, $2
+; MIPS64R6-NEXT: seh $6, $6
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
-; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: sync
+; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: daddiu $sp, $sp, 16
; MIPS64R6-NEXT: jrc $ra
;
@@ -4592,39 +4592,39 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64EL: # %bb.0: # %entry
; MIPS64EL-NEXT: daddiu $sp, $sp, -16
; MIPS64EL-NEXT: .cfi_def_cfa_offset 16
-; MIPS64EL-NEXT: move $1, $5
+; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64EL-NEXT: sync
-; MIPS64EL-NEXT: daddiu $2, $zero, -4
-; MIPS64EL-NEXT: and $6, $4, $2
+; MIPS64EL-NEXT: daddiu $1, $zero, -4
+; MIPS64EL-NEXT: and $1, $4, $1
; MIPS64EL-NEXT: andi $2, $4, 3
-; MIPS64EL-NEXT: sll $10, $2, 3
-; MIPS64EL-NEXT: ori $2, $zero, 255
-; MIPS64EL-NEXT: sllv $8, $2, $10
-; MIPS64EL-NEXT: nor $9, $zero, $8
-; MIPS64EL-NEXT: sllv $7, $1, $10
+; MIPS64EL-NEXT: sll $2, $2, 3
+; MIPS64EL-NEXT: ori $3, $zero, 255
+; MIPS64EL-NEXT: sllv $3, $3, $2
+; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB11_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $2, 0($6)
-; MIPS64EL-NEXT: and $2, $2, $8
-; MIPS64EL-NEXT: and $7, $7, $8
-; MIPS64EL-NEXT: sltu $5, $2, $7
-; MIPS64EL-NEXT: move $3, $2
-; MIPS64EL-NEXT: movz $3, $7, $5
-; MIPS64EL-NEXT: and $3, $3, $8
-; MIPS64EL-NEXT: and $4, $2, $9
-; MIPS64EL-NEXT: or $4, $4, $3
-; MIPS64EL-NEXT: sc $4, 0($6)
-; MIPS64EL-NEXT: beqz $4, .LBB11_1
+; MIPS64EL-NEXT: ll $7, 0($1)
+; MIPS64EL-NEXT: and $7, $7, $3
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: sltu $10, $7, $5
+; MIPS64EL-NEXT: move $8, $7
+; MIPS64EL-NEXT: movz $8, $5, $10
+; MIPS64EL-NEXT: and $8, $8, $3
+; MIPS64EL-NEXT: and $9, $7, $4
+; MIPS64EL-NEXT: or $9, $9, $8
+; MIPS64EL-NEXT: sc $9, 0($1)
+; MIPS64EL-NEXT: beqz $9, .LBB11_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $1, $2, $8
-; MIPS64EL-NEXT: srlv $1, $1, $10
-; MIPS64EL-NEXT: seh $1, $1
+; MIPS64EL-NEXT: and $6, $7, $3
+; MIPS64EL-NEXT: srlv $6, $6, $2
+; MIPS64EL-NEXT: seh $6, $6
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
-; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: sync
+; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: daddiu $sp, $sp, 16
; MIPS64EL-NEXT: jr $ra
; MIPS64EL-NEXT: nop
@@ -4633,39 +4633,39 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64ELR6: # %bb.0: # %entry
; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16
; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16
-; MIPS64ELR6-NEXT: move $1, $5
+; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64ELR6-NEXT: sync
-; MIPS64ELR6-NEXT: daddiu $2, $zero, -4
-; MIPS64ELR6-NEXT: and $6, $4, $2
+; MIPS64ELR6-NEXT: daddiu $1, $zero, -4
+; MIPS64ELR6-NEXT: and $1, $4, $1
; MIPS64ELR6-NEXT: andi $2, $4, 3
-; MIPS64ELR6-NEXT: sll $10, $2, 3
-; MIPS64ELR6-NEXT: ori $2, $zero, 255
-; MIPS64ELR6-NEXT: sllv $8, $2, $10
-; MIPS64ELR6-NEXT: nor $9, $zero, $8
-; MIPS64ELR6-NEXT: sllv $7, $1, $10
+; MIPS64ELR6-NEXT: sll $2, $2, 3
+; MIPS64ELR6-NEXT: ori $3, $zero, 255
+; MIPS64ELR6-NEXT: sllv $3, $3, $2
+; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB11_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $2, 0($6)
-; MIPS64ELR6-NEXT: and $2, $2, $8
-; MIPS64ELR6-NEXT: and $7, $7, $8
-; MIPS64ELR6-NEXT: sltu $5, $2, $7
-; MIPS64ELR6-NEXT: selnez $3, $2, $5
-; MIPS64ELR6-NEXT: seleqz $5, $7, $5
-; MIPS64ELR6-NEXT: or $3, $3, $5
-; MIPS64ELR6-NEXT: and $3, $3, $8
-; MIPS64ELR6-NEXT: and $4, $2, $9
-; MIPS64ELR6-NEXT: or $4, $4, $3
-; MIPS64ELR6-NEXT: sc $4, 0($6)
-; MIPS64ELR6-NEXT: beqzc $4, .LBB11_1
+; MIPS64ELR6-NEXT: ll $7, 0($1)
+; MIPS64ELR6-NEXT: and $7, $7, $3
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: sltu $10, $7, $5
+; MIPS64ELR6-NEXT: selnez $8, $7, $10
+; MIPS64ELR6-NEXT: seleqz $10, $5, $10
+; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: and $8, $8, $3
+; MIPS64ELR6-NEXT: and $9, $7, $4
+; MIPS64ELR6-NEXT: or $9, $9, $8
+; MIPS64ELR6-NEXT: sc $9, 0($1)
+; MIPS64ELR6-NEXT: beqzc $9, .LBB11_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $1, $2, $8
-; MIPS64ELR6-NEXT: srlv $1, $1, $10
-; MIPS64ELR6-NEXT: seh $1, $1
+; MIPS64ELR6-NEXT: and $6, $7, $3
+; MIPS64ELR6-NEXT: srlv $6, $6, $2
+; MIPS64ELR6-NEXT: seh $6, $6
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
-; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: sync
+; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16
; MIPS64ELR6-NEXT: jrc $ra
entry:
diff --git a/llvm/test/CodeGen/Mips/atomic.ll b/llvm/test/CodeGen/Mips/atomic.ll
index d7cfde7f1c04..59ff83e4969c 100644
--- a/llvm/test/CodeGen/Mips/atomic.ll
+++ b/llvm/test/CodeGen/Mips/atomic.ll
@@ -57,13 +57,13 @@ define i32 @AtomicLoadAdd32(i32 signext %incr) nounwind {
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addu $1, $2, $25
-; MIPS32O0-NEXT: lw $3, %got(x)($1)
+; MIPS32O0-NEXT: lw $1, %got(x)($1)
; MIPS32O0-NEXT: $BB0_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32O0-NEXT: ll $2, 0($3)
-; MIPS32O0-NEXT: addu $1, $2, $4
-; MIPS32O0-NEXT: sc $1, 0($3)
-; MIPS32O0-NEXT: beqz $1, $BB0_1
+; MIPS32O0-NEXT: ll $2, 0($1)
+; MIPS32O0-NEXT: addu $3, $2, $4
+; MIPS32O0-NEXT: sc $3, 0($1)
+; MIPS32O0-NEXT: beqz $3, $BB0_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: jr $ra
@@ -107,13 +107,13 @@ define i32 @AtomicLoadAdd32(i32 signext %incr) nounwind {
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addu $1, $2, $25
-; MIPS32R6O0-NEXT: lw $3, %got(x)($1)
+; MIPS32R6O0-NEXT: lw $1, %got(x)($1)
; MIPS32R6O0-NEXT: $BB0_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32R6O0-NEXT: ll $2, 0($3)
-; MIPS32R6O0-NEXT: addu $1, $2, $4
-; MIPS32R6O0-NEXT: sc $1, 0($3)
-; MIPS32R6O0-NEXT: beqzc $1, $BB0_1
+; MIPS32R6O0-NEXT: ll $2, 0($1)
+; MIPS32R6O0-NEXT: addu $3, $2, $4
+; MIPS32R6O0-NEXT: sc $3, 0($1)
+; MIPS32R6O0-NEXT: beqzc $3, $BB0_1
; MIPS32R6O0-NEXT: nop
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: jrc $ra
@@ -191,13 +191,13 @@ define i32 @AtomicLoadAdd32(i32 signext %incr) nounwind {
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
-; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1)
+; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB0_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $2, 0($3)
-; MIPS64R6O0-NEXT: addu $1, $2, $4
-; MIPS64R6O0-NEXT: sc $1, 0($3)
-; MIPS64R6O0-NEXT: beqzc $1, .LBB0_1
+; MIPS64R6O0-NEXT: ll $2, 0($1)
+; MIPS64R6O0-NEXT: addu $3, $2, $4
+; MIPS64R6O0-NEXT: sc $3, 0($1)
+; MIPS64R6O0-NEXT: beqzc $3, .LBB0_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
@@ -313,13 +313,13 @@ define i32 @AtomicLoadSub32(i32 signext %incr) nounwind {
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addu $1, $2, $25
-; MIPS32O0-NEXT: lw $3, %got(x)($1)
+; MIPS32O0-NEXT: lw $1, %got(x)($1)
; MIPS32O0-NEXT: $BB1_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32O0-NEXT: ll $2, 0($3)
-; MIPS32O0-NEXT: subu $1, $2, $4
-; MIPS32O0-NEXT: sc $1, 0($3)
-; MIPS32O0-NEXT: beqz $1, $BB1_1
+; MIPS32O0-NEXT: ll $2, 0($1)
+; MIPS32O0-NEXT: subu $3, $2, $4
+; MIPS32O0-NEXT: sc $3, 0($1)
+; MIPS32O0-NEXT: beqz $3, $BB1_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: jr $ra
@@ -363,13 +363,13 @@ define i32 @AtomicLoadSub32(i32 signext %incr) nounwind {
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addu $1, $2, $25
-; MIPS32R6O0-NEXT: lw $3, %got(x)($1)
+; MIPS32R6O0-NEXT: lw $1, %got(x)($1)
; MIPS32R6O0-NEXT: $BB1_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32R6O0-NEXT: ll $2, 0($3)
-; MIPS32R6O0-NEXT: subu $1, $2, $4
-; MIPS32R6O0-NEXT: sc $1, 0($3)
-; MIPS32R6O0-NEXT: beqzc $1, $BB1_1
+; MIPS32R6O0-NEXT: ll $2, 0($1)
+; MIPS32R6O0-NEXT: subu $3, $2, $4
+; MIPS32R6O0-NEXT: sc $3, 0($1)
+; MIPS32R6O0-NEXT: beqzc $3, $BB1_1
; MIPS32R6O0-NEXT: nop
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: jrc $ra
@@ -447,13 +447,13 @@ define i32 @AtomicLoadSub32(i32 signext %incr) nounwind {
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
-; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1)
+; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB1_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $2, 0($3)
-; MIPS64R6O0-NEXT: subu $1, $2, $4
-; MIPS64R6O0-NEXT: sc $1, 0($3)
-; MIPS64R6O0-NEXT: beqzc $1, .LBB1_1
+; MIPS64R6O0-NEXT: ll $2, 0($1)
+; MIPS64R6O0-NEXT: subu $3, $2, $4
+; MIPS64R6O0-NEXT: sc $3, 0($1)
+; MIPS64R6O0-NEXT: beqzc $3, .LBB1_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
@@ -569,13 +569,13 @@ define i32 @AtomicLoadXor32(i32 signext %incr) nounwind {
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addu $1, $2, $25
-; MIPS32O0-NEXT: lw $3, %got(x)($1)
+; MIPS32O0-NEXT: lw $1, %got(x)($1)
; MIPS32O0-NEXT: $BB2_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32O0-NEXT: ll $2, 0($3)
-; MIPS32O0-NEXT: xor $1, $2, $4
-; MIPS32O0-NEXT: sc $1, 0($3)
-; MIPS32O0-NEXT: beqz $1, $BB2_1
+; MIPS32O0-NEXT: ll $2, 0($1)
+; MIPS32O0-NEXT: xor $3, $2, $4
+; MIPS32O0-NEXT: sc $3, 0($1)
+; MIPS32O0-NEXT: beqz $3, $BB2_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: jr $ra
@@ -619,13 +619,13 @@ define i32 @AtomicLoadXor32(i32 signext %incr) nounwind {
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addu $1, $2, $25
-; MIPS32R6O0-NEXT: lw $3, %got(x)($1)
+; MIPS32R6O0-NEXT: lw $1, %got(x)($1)
; MIPS32R6O0-NEXT: $BB2_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32R6O0-NEXT: ll $2, 0($3)
-; MIPS32R6O0-NEXT: xor $1, $2, $4
-; MIPS32R6O0-NEXT: sc $1, 0($3)
-; MIPS32R6O0-NEXT: beqzc $1, $BB2_1
+; MIPS32R6O0-NEXT: ll $2, 0($1)
+; MIPS32R6O0-NEXT: xor $3, $2, $4
+; MIPS32R6O0-NEXT: sc $3, 0($1)
+; MIPS32R6O0-NEXT: beqzc $3, $BB2_1
; MIPS32R6O0-NEXT: nop
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: jrc $ra
@@ -703,13 +703,13 @@ define i32 @AtomicLoadXor32(i32 signext %incr) nounwind {
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
-; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1)
+; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB2_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $2, 0($3)
-; MIPS64R6O0-NEXT: xor $1, $2, $4
-; MIPS64R6O0-NEXT: sc $1, 0($3)
-; MIPS64R6O0-NEXT: beqzc $1, .LBB2_1
+; MIPS64R6O0-NEXT: ll $2, 0($1)
+; MIPS64R6O0-NEXT: xor $3, $2, $4
+; MIPS64R6O0-NEXT: sc $3, 0($1)
+; MIPS64R6O0-NEXT: beqzc $3, .LBB2_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
@@ -824,13 +824,13 @@ define i32 @AtomicLoadOr32(i32 signext %incr) nounwind {
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addu $1, $2, $25
-; MIPS32O0-NEXT: lw $3, %got(x)($1)
+; MIPS32O0-NEXT: lw $1, %got(x)($1)
; MIPS32O0-NEXT: $BB3_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32O0-NEXT: ll $2, 0($3)
-; MIPS32O0-NEXT: or $1, $2, $4
-; MIPS32O0-NEXT: sc $1, 0($3)
-; MIPS32O0-NEXT: beqz $1, $BB3_1
+; MIPS32O0-NEXT: ll $2, 0($1)
+; MIPS32O0-NEXT: or $3, $2, $4
+; MIPS32O0-NEXT: sc $3, 0($1)
+; MIPS32O0-NEXT: beqz $3, $BB3_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: jr $ra
@@ -874,13 +874,13 @@ define i32 @AtomicLoadOr32(i32 signext %incr) nounwind {
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addu $1, $2, $25
-; MIPS32R6O0-NEXT: lw $3, %got(x)($1)
+; MIPS32R6O0-NEXT: lw $1, %got(x)($1)
; MIPS32R6O0-NEXT: $BB3_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32R6O0-NEXT: ll $2, 0($3)
-; MIPS32R6O0-NEXT: or $1, $2, $4
-; MIPS32R6O0-NEXT: sc $1, 0($3)
-; MIPS32R6O0-NEXT: beqzc $1, $BB3_1
+; MIPS32R6O0-NEXT: ll $2, 0($1)
+; MIPS32R6O0-NEXT: or $3, $2, $4
+; MIPS32R6O0-NEXT: sc $3, 0($1)
+; MIPS32R6O0-NEXT: beqzc $3, $BB3_1
; MIPS32R6O0-NEXT: nop
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: jrc $ra
@@ -958,13 +958,13 @@ define i32 @AtomicLoadOr32(i32 signext %incr) nounwind {
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
-; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1)
+; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB3_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $2, 0($3)
-; MIPS64R6O0-NEXT: or $1, $2, $4
-; MIPS64R6O0-NEXT: sc $1, 0($3)
-; MIPS64R6O0-NEXT: beqzc $1, .LBB3_1
+; MIPS64R6O0-NEXT: ll $2, 0($1)
+; MIPS64R6O0-NEXT: or $3, $2, $4
+; MIPS64R6O0-NEXT: sc $3, 0($1)
+; MIPS64R6O0-NEXT: beqzc $3, .LBB3_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
@@ -1079,13 +1079,13 @@ define i32 @AtomicLoadAnd32(i32 signext %incr) nounwind {
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addu $1, $2, $25
-; MIPS32O0-NEXT: lw $3, %got(x)($1)
+; MIPS32O0-NEXT: lw $1, %got(x)($1)
; MIPS32O0-NEXT: $BB4_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32O0-NEXT: ll $2, 0($3)
-; MIPS32O0-NEXT: and $1, $2, $4
-; MIPS32O0-NEXT: sc $1, 0($3)
-; MIPS32O0-NEXT: beqz $1, $BB4_1
+; MIPS32O0-NEXT: ll $2, 0($1)
+; MIPS32O0-NEXT: and $3, $2, $4
+; MIPS32O0-NEXT: sc $3, 0($1)
+; MIPS32O0-NEXT: beqz $3, $BB4_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: jr $ra
@@ -1129,13 +1129,13 @@ define i32 @AtomicLoadAnd32(i32 signext %incr) nounwind {
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addu $1, $2, $25
-; MIPS32R6O0-NEXT: lw $3, %got(x)($1)
+; MIPS32R6O0-NEXT: lw $1, %got(x)($1)
; MIPS32R6O0-NEXT: $BB4_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32R6O0-NEXT: ll $2, 0($3)
-; MIPS32R6O0-NEXT: and $1, $2, $4
-; MIPS32R6O0-NEXT: sc $1, 0($3)
-; MIPS32R6O0-NEXT: beqzc $1, $BB4_1
+; MIPS32R6O0-NEXT: ll $2, 0($1)
+; MIPS32R6O0-NEXT: and $3, $2, $4
+; MIPS32R6O0-NEXT: sc $3, 0($1)
+; MIPS32R6O0-NEXT: beqzc $3, $BB4_1
; MIPS32R6O0-NEXT: nop
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: jrc $ra
@@ -1213,13 +1213,13 @@ define i32 @AtomicLoadAnd32(i32 signext %incr) nounwind {
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
-; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1)
+; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB4_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $2, 0($3)
-; MIPS64R6O0-NEXT: and $1, $2, $4
-; MIPS64R6O0-NEXT: sc $1, 0($3)
-; MIPS64R6O0-NEXT: beqzc $1, .LBB4_1
+; MIPS64R6O0-NEXT: ll $2, 0($1)
+; MIPS64R6O0-NEXT: and $3, $2, $4
+; MIPS64R6O0-NEXT: sc $3, 0($1)
+; MIPS64R6O0-NEXT: beqzc $3, .LBB4_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
@@ -1335,14 +1335,14 @@ define i32 @AtomicLoadNand32(i32 signext %incr) nounwind {
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addu $1, $2, $25
-; MIPS32O0-NEXT: lw $3, %got(x)($1)
+; MIPS32O0-NEXT: lw $1, %got(x)($1)
; MIPS32O0-NEXT: $BB5_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32O0-NEXT: ll $2, 0($3)
-; MIPS32O0-NEXT: and $1, $2, $4
-; MIPS32O0-NEXT: nor $1, $zero, $1
-; MIPS32O0-NEXT: sc $1, 0($3)
-; MIPS32O0-NEXT: beqz $1, $BB5_1
+; MIPS32O0-NEXT: ll $2, 0($1)
+; MIPS32O0-NEXT: and $3, $2, $4
+; MIPS32O0-NEXT: nor $3, $zero, $3
+; MIPS32O0-NEXT: sc $3, 0($1)
+; MIPS32O0-NEXT: beqz $3, $BB5_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: jr $ra
@@ -1388,14 +1388,14 @@ define i32 @AtomicLoadNand32(i32 signext %incr) nounwind {
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addu $1, $2, $25
-; MIPS32R6O0-NEXT: lw $3, %got(x)($1)
+; MIPS32R6O0-NEXT: lw $1, %got(x)($1)
; MIPS32R6O0-NEXT: $BB5_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32R6O0-NEXT: ll $2, 0($3)
-; MIPS32R6O0-NEXT: and $1, $2, $4
-; MIPS32R6O0-NEXT: nor $1, $zero, $1
-; MIPS32R6O0-NEXT: sc $1, 0($3)
-; MIPS32R6O0-NEXT: beqzc $1, $BB5_1
+; MIPS32R6O0-NEXT: ll $2, 0($1)
+; MIPS32R6O0-NEXT: and $3, $2, $4
+; MIPS32R6O0-NEXT: nor $3, $zero, $3
+; MIPS32R6O0-NEXT: sc $3, 0($1)
+; MIPS32R6O0-NEXT: beqzc $3, $BB5_1
; MIPS32R6O0-NEXT: nop
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: jrc $ra
@@ -1477,14 +1477,14 @@ define i32 @AtomicLoadNand32(i32 signext %incr) nounwind {
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
-; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1)
+; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB5_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $2, 0($3)
-; MIPS64R6O0-NEXT: and $1, $2, $4
-; MIPS64R6O0-NEXT: nor $1, $zero, $1
-; MIPS64R6O0-NEXT: sc $1, 0($3)
-; MIPS64R6O0-NEXT: beqzc $1, .LBB5_1
+; MIPS64R6O0-NEXT: ll $2, 0($1)
+; MIPS64R6O0-NEXT: and $3, $2, $4
+; MIPS64R6O0-NEXT: nor $3, $zero, $3
+; MIPS64R6O0-NEXT: sc $3, 0($1)
+; MIPS64R6O0-NEXT: beqzc $3, .LBB5_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
@@ -1609,16 +1609,17 @@ define i32 @AtomicSwap32(i32 signext %newval) nounwind {
; MIPS32O0-NEXT: addiu $sp, $sp, -8
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: sw $4, 4($sp)
-; MIPS32O0-NEXT: lw $4, 4($sp)
-; MIPS32O0-NEXT: lw $3, %got(x)($1)
+; MIPS32O0-NEXT: lw $2, 4($sp)
+; MIPS32O0-NEXT: lw $1, %got(x)($1)
; MIPS32O0-NEXT: $BB6_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32O0-NEXT: ll $2, 0($3)
-; MIPS32O0-NEXT: move $1, $4
-; MIPS32O0-NEXT: sc $1, 0($3)
-; MIPS32O0-NEXT: beqz $1, $BB6_1
+; MIPS32O0-NEXT: ll $3, 0($1)
+; MIPS32O0-NEXT: move $4, $2
+; MIPS32O0-NEXT: sc $4, 0($1)
+; MIPS32O0-NEXT: beqz $4, $BB6_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
+; MIPS32O0-NEXT: move $2, $3
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
@@ -1668,15 +1669,16 @@ define i32 @AtomicSwap32(i32 signext %newval) nounwind {
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: sw $4, 4($sp)
-; MIPS32R6O0-NEXT: lw $4, 4($sp)
-; MIPS32R6O0-NEXT: lw $3, %got(x)($1)
+; MIPS32R6O0-NEXT: lw $2, 4($sp)
+; MIPS32R6O0-NEXT: lw $1, %got(x)($1)
; MIPS32R6O0-NEXT: $BB6_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32R6O0-NEXT: ll $2, 0($3)
-; MIPS32R6O0-NEXT: move $1, $4
-; MIPS32R6O0-NEXT: sc $1, 0($3)
-; MIPS32R6O0-NEXT: beqzc $1, $BB6_1
+; MIPS32R6O0-NEXT: ll $3, 0($1)
+; MIPS32R6O0-NEXT: move $4, $2
+; MIPS32R6O0-NEXT: sc $4, 0($1)
+; MIPS32R6O0-NEXT: beqzc $4, $BB6_1
; MIPS32R6O0-NEXT: # %bb.2: # %entry
+; MIPS32R6O0-NEXT: move $2, $3
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
;
@@ -1762,17 +1764,18 @@ define i32 @AtomicSwap32(i32 signext %newval) nounwind {
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32)))
-; MIPS64R6O0-NEXT: move $2, $4
-; MIPS64R6O0-NEXT: sw $2, 12($sp)
-; MIPS64R6O0-NEXT: lw $4, 12($sp)
-; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1)
+; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
+; MIPS64R6O0-NEXT: sw $4, 12($sp)
+; MIPS64R6O0-NEXT: lw $2, 12($sp)
+; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB6_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $2, 0($3)
-; MIPS64R6O0-NEXT: move $1, $4
-; MIPS64R6O0-NEXT: sc $1, 0($3)
-; MIPS64R6O0-NEXT: beqzc $1, .LBB6_1
+; MIPS64R6O0-NEXT: ll $3, 0($1)
+; MIPS64R6O0-NEXT: move $4, $2
+; MIPS64R6O0-NEXT: sc $4, 0($1)
+; MIPS64R6O0-NEXT: beqzc $4, .LBB6_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
+; MIPS64R6O0-NEXT: move $2, $3
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
@@ -1909,23 +1912,24 @@ define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind {
; MIPS32O0-NEXT: addiu $sp, $sp, -8
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: sw $5, 4($sp)
-; MIPS32O0-NEXT: lw $6, 4($sp)
-; MIPS32O0-NEXT: lw $3, %got(x)($1)
-; MIPS32O0-NEXT: move $5, $4
+; MIPS32O0-NEXT: lw $2, 4($sp)
+; MIPS32O0-NEXT: lw $1, %got(x)($1)
+; MIPS32O0-NEXT: move $3, $4
; MIPS32O0-NEXT: $BB7_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32O0-NEXT: ll $2, 0($3)
-; MIPS32O0-NEXT: bne $2, $5, $BB7_3
+; MIPS32O0-NEXT: ll $5, 0($1)
+; MIPS32O0-NEXT: bne $5, $3, $BB7_3
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: # in Loop: Header=BB7_1 Depth=1
-; MIPS32O0-NEXT: move $1, $6
-; MIPS32O0-NEXT: sc $1, 0($3)
-; MIPS32O0-NEXT: beqz $1, $BB7_1
+; MIPS32O0-NEXT: move $6, $2
+; MIPS32O0-NEXT: sc $6, 0($1)
+; MIPS32O0-NEXT: beqz $6, $BB7_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: $BB7_3: # %entry
-; MIPS32O0-NEXT: xor $1, $2, $4
+; MIPS32O0-NEXT: xor $1, $5, $4
; MIPS32O0-NEXT: sltiu $1, $1, 1
+; MIPS32O0-NEXT: move $2, $5
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
@@ -1982,18 +1986,19 @@ define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind {
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: sw $5, 4($sp)
-; MIPS32R6O0-NEXT: lw $5, 4($sp)
-; MIPS32R6O0-NEXT: lw $3, %got(x)($1)
+; MIPS32R6O0-NEXT: lw $2, 4($sp)
+; MIPS32R6O0-NEXT: lw $1, %got(x)($1)
; MIPS32R6O0-NEXT: $BB7_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32R6O0-NEXT: ll $2, 0($3)
-; MIPS32R6O0-NEXT: bnec $2, $4, $BB7_3
+; MIPS32R6O0-NEXT: ll $3, 0($1)
+; MIPS32R6O0-NEXT: bnec $3, $4, $BB7_3
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1
-; MIPS32R6O0-NEXT: move $1, $5
-; MIPS32R6O0-NEXT: sc $1, 0($3)
-; MIPS32R6O0-NEXT: beqzc $1, $BB7_1
+; MIPS32R6O0-NEXT: move $5, $2
+; MIPS32R6O0-NEXT: sc $5, 0($1)
+; MIPS32R6O0-NEXT: beqzc $5, $BB7_1
; MIPS32R6O0-NEXT: $BB7_3: # %entry
+; MIPS32R6O0-NEXT: move $2, $3
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
;
@@ -2095,20 +2100,21 @@ define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind {
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
-; MIPS64R6O0-NEXT: move $2, $5
-; MIPS64R6O0-NEXT: sw $2, 12($sp)
-; MIPS64R6O0-NEXT: lw $5, 12($sp)
-; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1)
+; MIPS64R6O0-NEXT: # kill: def $a1 killed $a1 killed $a1_64
+; MIPS64R6O0-NEXT: sw $5, 12($sp)
+; MIPS64R6O0-NEXT: lw $2, 12($sp)
+; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB7_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $2, 0($3)
-; MIPS64R6O0-NEXT: bnec $2, $4, .LBB7_3
+; MIPS64R6O0-NEXT: ll $3, 0($1)
+; MIPS64R6O0-NEXT: bnec $3, $4, .LBB7_3
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1
-; MIPS64R6O0-NEXT: move $1, $5
-; MIPS64R6O0-NEXT: sc $1, 0($3)
-; MIPS64R6O0-NEXT: beqzc $1, .LBB7_1
+; MIPS64R6O0-NEXT: move $5, $2
+; MIPS64R6O0-NEXT: sc $5, 0($1)
+; MIPS64R6O0-NEXT: beqzc $5, .LBB7_1
; MIPS64R6O0-NEXT: .LBB7_3: # %entry
+; MIPS64R6O0-NEXT: move $2, $3
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
@@ -2280,34 +2286,34 @@ define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind {
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: lw $1, %got(y)($1)
; MIPS32O0-NEXT: addiu $2, $zero, -4
-; MIPS32O0-NEXT: and $5, $1, $2
+; MIPS32O0-NEXT: and $2, $1, $2
; MIPS32O0-NEXT: andi $1, $1, 3
-; MIPS32O0-NEXT: sll $9, $1, 3
-; MIPS32O0-NEXT: ori $1, $zero, 255
-; MIPS32O0-NEXT: sllv $7, $1, $9
-; MIPS32O0-NEXT: nor $8, $zero, $7
-; MIPS32O0-NEXT: sllv $6, $4, $9
+; MIPS32O0-NEXT: sll $1, $1, 3
+; MIPS32O0-NEXT: ori $3, $zero, 255
+; MIPS32O0-NEXT: sllv $3, $3, $1
+; MIPS32O0-NEXT: nor $5, $zero, $3
+; MIPS32O0-NEXT: sllv $4, $4, $1
; MIPS32O0-NEXT: $BB8_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32O0-NEXT: ll $2, 0($5)
-; MIPS32O0-NEXT: addu $3, $2, $6
-; MIPS32O0-NEXT: and $3, $3, $7
-; MIPS32O0-NEXT: and $4, $2, $8
-; MIPS32O0-NEXT: or $4, $4, $3
-; MIPS32O0-NEXT: sc $4, 0($5)
-; MIPS32O0-NEXT: beqz $4, $BB8_1
+; MIPS32O0-NEXT: ll $7, 0($2)
+; MIPS32O0-NEXT: addu $8, $7, $4
+; MIPS32O0-NEXT: and $8, $8, $3
+; MIPS32O0-NEXT: and $9, $7, $5
+; MIPS32O0-NEXT: or $9, $9, $8
+; MIPS32O0-NEXT: sc $9, 0($2)
+; MIPS32O0-NEXT: beqz $9, $BB8_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
-; MIPS32O0-NEXT: and $1, $2, $7
-; MIPS32O0-NEXT: srlv $1, $1, $9
-; MIPS32O0-NEXT: sll $1, $1, 24
-; MIPS32O0-NEXT: sra $1, $1, 24
+; MIPS32O0-NEXT: and $6, $7, $3
+; MIPS32O0-NEXT: srlv $6, $6, $1
+; MIPS32O0-NEXT: sll $6, $6, 24
+; MIPS32O0-NEXT: sra $6, $6, 24
; MIPS32O0-NEXT: # %bb.3: # %entry
-; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: # %bb.4: # %entry
; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
-; MIPS32O0-NEXT: sll $1, $1, 24
-; MIPS32O0-NEXT: sra $2, $1, 24
+; MIPS32O0-NEXT: sll $2, $1, 24
+; MIPS32O0-NEXT: sra $2, $2, 24
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
@@ -2381,31 +2387,31 @@ define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind {
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: addu $1, $2, $25
-; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0
+; MIPS32R6O0-NEXT: move $2, $4
; MIPS32R6O0-NEXT: lw $1, %got(y)($1)
-; MIPS32R6O0-NEXT: addiu $2, $zero, -4
-; MIPS32R6O0-NEXT: and $5, $1, $2
+; MIPS32R6O0-NEXT: addiu $3, $zero, -4
+; MIPS32R6O0-NEXT: and $3, $1, $3
; MIPS32R6O0-NEXT: andi $1, $1, 3
-; MIPS32R6O0-NEXT: sll $9, $1, 3
-; MIPS32R6O0-NEXT: ori $1, $zero, 255
-; MIPS32R6O0-NEXT: sllv $7, $1, $9
-; MIPS32R6O0-NEXT: nor $8, $zero, $7
-; MIPS32R6O0-NEXT: sllv $6, $4, $9
+; MIPS32R6O0-NEXT: sll $1, $1, 3
+; MIPS32R6O0-NEXT: ori $5, $zero, 255
+; MIPS32R6O0-NEXT: sllv $5, $5, $1
+; MIPS32R6O0-NEXT: nor $6, $zero, $5
+; MIPS32R6O0-NEXT: sllv $4, $4, $1
; MIPS32R6O0-NEXT: $BB8_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32R6O0-NEXT: ll $2, 0($5)
-; MIPS32R6O0-NEXT: addu $3, $2, $6
-; MIPS32R6O0-NEXT: and $3, $3, $7
-; MIPS32R6O0-NEXT: and $4, $2, $8
-; MIPS32R6O0-NEXT: or $4, $4, $3
-; MIPS32R6O0-NEXT: sc $4, 0($5)
-; MIPS32R6O0-NEXT: beqzc $4, $BB8_1
+; MIPS32R6O0-NEXT: ll $8, 0($3)
+; MIPS32R6O0-NEXT: addu $9, $8, $4
+; MIPS32R6O0-NEXT: and $9, $9, $5
+; MIPS32R6O0-NEXT: and $10, $8, $6
+; MIPS32R6O0-NEXT: or $10, $10, $9
+; MIPS32R6O0-NEXT: sc $10, 0($3)
+; MIPS32R6O0-NEXT: beqzc $10, $BB8_1
; MIPS32R6O0-NEXT: # %bb.2: # %entry
-; MIPS32R6O0-NEXT: and $1, $2, $7
-; MIPS32R6O0-NEXT: srlv $1, $1, $9
-; MIPS32R6O0-NEXT: seb $1, $1
+; MIPS32R6O0-NEXT: and $7, $8, $5
+; MIPS32R6O0-NEXT: srlv $7, $7, $1
+; MIPS32R6O0-NEXT: seb $7, $7
; MIPS32R6O0-NEXT: # %bb.3: # %entry
-; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.4: # %entry
; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: seb $2, $1
@@ -2548,33 +2554,33 @@ define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind {
; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
-; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8)))
-; MIPS64R6O0-NEXT: move $1, $4
-; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2)
-; MIPS64R6O0-NEXT: daddiu $3, $zero, -4
-; MIPS64R6O0-NEXT: and $5, $2, $3
-; MIPS64R6O0-NEXT: andi $2, $2, 3
-; MIPS64R6O0-NEXT: xori $2, $2, 3
-; MIPS64R6O0-NEXT: sll $9, $2, 3
-; MIPS64R6O0-NEXT: ori $2, $zero, 255
-; MIPS64R6O0-NEXT: sllv $7, $2, $9
-; MIPS64R6O0-NEXT: nor $8, $zero, $7
-; MIPS64R6O0-NEXT: sllv $6, $1, $9
+; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8)))
+; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
+; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
+; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
+; MIPS64R6O0-NEXT: and $2, $1, $2
+; MIPS64R6O0-NEXT: andi $1, $1, 3
+; MIPS64R6O0-NEXT: xori $1, $1, 3
+; MIPS64R6O0-NEXT: sll $1, $1, 3
+; MIPS64R6O0-NEXT: ori $3, $zero, 255
+; MIPS64R6O0-NEXT: sllv $3, $3, $1
+; MIPS64R6O0-NEXT: nor $5, $zero, $3
+; MIPS64R6O0-NEXT: sllv $4, $4, $1
; MIPS64R6O0-NEXT: .LBB8_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $2, 0($5)
-; MIPS64R6O0-NEXT: addu $3, $2, $6
-; MIPS64R6O0-NEXT: and $3, $3, $7
-; MIPS64R6O0-NEXT: and $4, $2, $8
-; MIPS64R6O0-NEXT: or $4, $4, $3
-; MIPS64R6O0-NEXT: sc $4, 0($5)
-; MIPS64R6O0-NEXT: beqzc $4, .LBB8_1
+; MIPS64R6O0-NEXT: ll $7, 0($2)
+; MIPS64R6O0-NEXT: addu $8, $7, $4
+; MIPS64R6O0-NEXT: and $8, $8, $3
+; MIPS64R6O0-NEXT: and $9, $7, $5
+; MIPS64R6O0-NEXT: or $9, $9, $8
+; MIPS64R6O0-NEXT: sc $9, 0($2)
+; MIPS64R6O0-NEXT: beqzc $9, .LBB8_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
-; MIPS64R6O0-NEXT: and $1, $2, $7
-; MIPS64R6O0-NEXT: srlv $1, $1, $9
-; MIPS64R6O0-NEXT: seb $1, $1
+; MIPS64R6O0-NEXT: and $6, $7, $3
+; MIPS64R6O0-NEXT: srlv $6, $6, $1
+; MIPS64R6O0-NEXT: seb $6, $6
; MIPS64R6O0-NEXT: # %bb.3: # %entry
-; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seb $2, $1
@@ -2796,34 +2802,34 @@ define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind {
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: lw $1, %got(y)($1)
; MIPS32O0-NEXT: addiu $2, $zero, -4
-; MIPS32O0-NEXT: and $5, $1, $2
+; MIPS32O0-NEXT: and $2, $1, $2
; MIPS32O0-NEXT: andi $1, $1, 3
-; MIPS32O0-NEXT: sll $9, $1, 3
-; MIPS32O0-NEXT: ori $1, $zero, 255
-; MIPS32O0-NEXT: sllv $7, $1, $9
-; MIPS32O0-NEXT: nor $8, $zero, $7
-; MIPS32O0-NEXT: sllv $6, $4, $9
+; MIPS32O0-NEXT: sll $1, $1, 3
+; MIPS32O0-NEXT: ori $3, $zero, 255
+; MIPS32O0-NEXT: sllv $3, $3, $1
+; MIPS32O0-NEXT: nor $5, $zero, $3
+; MIPS32O0-NEXT: sllv $4, $4, $1
; MIPS32O0-NEXT: $BB9_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32O0-NEXT: ll $2, 0($5)
-; MIPS32O0-NEXT: subu $3, $2, $6
-; MIPS32O0-NEXT: and $3, $3, $7
-; MIPS32O0-NEXT: and $4, $2, $8
-; MIPS32O0-NEXT: or $4, $4, $3
-; MIPS32O0-NEXT: sc $4, 0($5)
-; MIPS32O0-NEXT: beqz $4, $BB9_1
+; MIPS32O0-NEXT: ll $7, 0($2)
+; MIPS32O0-NEXT: subu $8, $7, $4
+; MIPS32O0-NEXT: and $8, $8, $3
+; MIPS32O0-NEXT: and $9, $7, $5
+; MIPS32O0-NEXT: or $9, $9, $8
+; MIPS32O0-NEXT: sc $9, 0($2)
+; MIPS32O0-NEXT: beqz $9, $BB9_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
-; MIPS32O0-NEXT: and $1, $2, $7
-; MIPS32O0-NEXT: srlv $1, $1, $9
-; MIPS32O0-NEXT: sll $1, $1, 24
-; MIPS32O0-NEXT: sra $1, $1, 24
+; MIPS32O0-NEXT: and $6, $7, $3
+; MIPS32O0-NEXT: srlv $6, $6, $1
+; MIPS32O0-NEXT: sll $6, $6, 24
+; MIPS32O0-NEXT: sra $6, $6, 24
; MIPS32O0-NEXT: # %bb.3: # %entry
-; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: # %bb.4: # %entry
; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
-; MIPS32O0-NEXT: sll $1, $1, 24
-; MIPS32O0-NEXT: sra $2, $1, 24
+; MIPS32O0-NEXT: sll $2, $1, 24
+; MIPS32O0-NEXT: sra $2, $2, 24
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
@@ -2897,31 +2903,31 @@ define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind {
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: addu $1, $2, $25
-; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0
+; MIPS32R6O0-NEXT: move $2, $4
; MIPS32R6O0-NEXT: lw $1, %got(y)($1)
-; MIPS32R6O0-NEXT: addiu $2, $zero, -4
-; MIPS32R6O0-NEXT: and $5, $1, $2
+; MIPS32R6O0-NEXT: addiu $3, $zero, -4
+; MIPS32R6O0-NEXT: and $3, $1, $3
; MIPS32R6O0-NEXT: andi $1, $1, 3
-; MIPS32R6O0-NEXT: sll $9, $1, 3
-; MIPS32R6O0-NEXT: ori $1, $zero, 255
-; MIPS32R6O0-NEXT: sllv $7, $1, $9
-; MIPS32R6O0-NEXT: nor $8, $zero, $7
-; MIPS32R6O0-NEXT: sllv $6, $4, $9
+; MIPS32R6O0-NEXT: sll $1, $1, 3
+; MIPS32R6O0-NEXT: ori $5, $zero, 255
+; MIPS32R6O0-NEXT: sllv $5, $5, $1
+; MIPS32R6O0-NEXT: nor $6, $zero, $5
+; MIPS32R6O0-NEXT: sllv $4, $4, $1
; MIPS32R6O0-NEXT: $BB9_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32R6O0-NEXT: ll $2, 0($5)
-; MIPS32R6O0-NEXT: subu $3, $2, $6
-; MIPS32R6O0-NEXT: and $3, $3, $7
-; MIPS32R6O0-NEXT: and $4, $2, $8
-; MIPS32R6O0-NEXT: or $4, $4, $3
-; MIPS32R6O0-NEXT: sc $4, 0($5)
-; MIPS32R6O0-NEXT: beqzc $4, $BB9_1
+; MIPS32R6O0-NEXT: ll $8, 0($3)
+; MIPS32R6O0-NEXT: subu $9, $8, $4
+; MIPS32R6O0-NEXT: and $9, $9, $5
+; MIPS32R6O0-NEXT: and $10, $8, $6
+; MIPS32R6O0-NEXT: or $10, $10, $9
+; MIPS32R6O0-NEXT: sc $10, 0($3)
+; MIPS32R6O0-NEXT: beqzc $10, $BB9_1
; MIPS32R6O0-NEXT: # %bb.2: # %entry
-; MIPS32R6O0-NEXT: and $1, $2, $7
-; MIPS32R6O0-NEXT: srlv $1, $1, $9
-; MIPS32R6O0-NEXT: seb $1, $1
+; MIPS32R6O0-NEXT: and $7, $8, $5
+; MIPS32R6O0-NEXT: srlv $7, $7, $1
+; MIPS32R6O0-NEXT: seb $7, $7
; MIPS32R6O0-NEXT: # %bb.3: # %entry
-; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.4: # %entry
; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: seb $2, $1
@@ -3064,33 +3070,33 @@ define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind {
; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
-; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadSub8)))
-; MIPS64R6O0-NEXT: move $1, $4
-; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2)
-; MIPS64R6O0-NEXT: daddiu $3, $zero, -4
-; MIPS64R6O0-NEXT: and $5, $2, $3
-; MIPS64R6O0-NEXT: andi $2, $2, 3
-; MIPS64R6O0-NEXT: xori $2, $2, 3
-; MIPS64R6O0-NEXT: sll $9, $2, 3
-; MIPS64R6O0-NEXT: ori $2, $zero, 255
-; MIPS64R6O0-NEXT: sllv $7, $2, $9
-; MIPS64R6O0-NEXT: nor $8, $zero, $7
-; MIPS64R6O0-NEXT: sllv $6, $1, $9
+; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8)))
+; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
+; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
+; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
+; MIPS64R6O0-NEXT: and $2, $1, $2
+; MIPS64R6O0-NEXT: andi $1, $1, 3
+; MIPS64R6O0-NEXT: xori $1, $1, 3
+; MIPS64R6O0-NEXT: sll $1, $1, 3
+; MIPS64R6O0-NEXT: ori $3, $zero, 255
+; MIPS64R6O0-NEXT: sllv $3, $3, $1
+; MIPS64R6O0-NEXT: nor $5, $zero, $3
+; MIPS64R6O0-NEXT: sllv $4, $4, $1
; MIPS64R6O0-NEXT: .LBB9_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $2, 0($5)
-; MIPS64R6O0-NEXT: subu $3, $2, $6
-; MIPS64R6O0-NEXT: and $3, $3, $7
-; MIPS64R6O0-NEXT: and $4, $2, $8
-; MIPS64R6O0-NEXT: or $4, $4, $3
-; MIPS64R6O0-NEXT: sc $4, 0($5)
-; MIPS64R6O0-NEXT: beqzc $4, .LBB9_1
+; MIPS64R6O0-NEXT: ll $7, 0($2)
+; MIPS64R6O0-NEXT: subu $8, $7, $4
+; MIPS64R6O0-NEXT: and $8, $8, $3
+; MIPS64R6O0-NEXT: and $9, $7, $5
+; MIPS64R6O0-NEXT: or $9, $9, $8
+; MIPS64R6O0-NEXT: sc $9, 0($2)
+; MIPS64R6O0-NEXT: beqzc $9, .LBB9_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
-; MIPS64R6O0-NEXT: and $1, $2, $7
-; MIPS64R6O0-NEXT: srlv $1, $1, $9
-; MIPS64R6O0-NEXT: seb $1, $1
+; MIPS64R6O0-NEXT: and $6, $7, $3
+; MIPS64R6O0-NEXT: srlv $6, $6, $1
+; MIPS64R6O0-NEXT: seb $6, $6
; MIPS64R6O0-NEXT: # %bb.3: # %entry
-; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seb $2, $1
@@ -3314,35 +3320,35 @@ define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind {
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: lw $1, %got(y)($1)
; MIPS32O0-NEXT: addiu $2, $zero, -4
-; MIPS32O0-NEXT: and $5, $1, $2
+; MIPS32O0-NEXT: and $2, $1, $2
; MIPS32O0-NEXT: andi $1, $1, 3
-; MIPS32O0-NEXT: sll $9, $1, 3
-; MIPS32O0-NEXT: ori $1, $zero, 255
-; MIPS32O0-NEXT: sllv $7, $1, $9
-; MIPS32O0-NEXT: nor $8, $zero, $7
-; MIPS32O0-NEXT: sllv $6, $4, $9
+; MIPS32O0-NEXT: sll $1, $1, 3
+; MIPS32O0-NEXT: ori $3, $zero, 255
+; MIPS32O0-NEXT: sllv $3, $3, $1
+; MIPS32O0-NEXT: nor $5, $zero, $3
+; MIPS32O0-NEXT: sllv $4, $4, $1
; MIPS32O0-NEXT: $BB10_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32O0-NEXT: ll $2, 0($5)
-; MIPS32O0-NEXT: and $3, $2, $6
-; MIPS32O0-NEXT: nor $3, $zero, $3
-; MIPS32O0-NEXT: and $3, $3, $7
-; MIPS32O0-NEXT: and $4, $2, $8
-; MIPS32O0-NEXT: or $4, $4, $3
-; MIPS32O0-NEXT: sc $4, 0($5)
-; MIPS32O0-NEXT: beqz $4, $BB10_1
+; MIPS32O0-NEXT: ll $7, 0($2)
+; MIPS32O0-NEXT: and $8, $7, $4
+; MIPS32O0-NEXT: nor $8, $zero, $8
+; MIPS32O0-NEXT: and $8, $8, $3
+; MIPS32O0-NEXT: and $9, $7, $5
+; MIPS32O0-NEXT: or $9, $9, $8
+; MIPS32O0-NEXT: sc $9, 0($2)
+; MIPS32O0-NEXT: beqz $9, $BB10_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
-; MIPS32O0-NEXT: and $1, $2, $7
-; MIPS32O0-NEXT: srlv $1, $1, $9
-; MIPS32O0-NEXT: sll $1, $1, 24
-; MIPS32O0-NEXT: sra $1, $1, 24
+; MIPS32O0-NEXT: and $6, $7, $3
+; MIPS32O0-NEXT: srlv $6, $6, $1
+; MIPS32O0-NEXT: sll $6, $6, 24
+; MIPS32O0-NEXT: sra $6, $6, 24
; MIPS32O0-NEXT: # %bb.3: # %entry
-; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: # %bb.4: # %entry
; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
-; MIPS32O0-NEXT: sll $1, $1, 24
-; MIPS32O0-NEXT: sra $2, $1, 24
+; MIPS32O0-NEXT: sll $2, $1, 24
+; MIPS32O0-NEXT: sra $2, $2, 24
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
@@ -3418,32 +3424,32 @@ define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind {
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: addu $1, $2, $25
-; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0
+; MIPS32R6O0-NEXT: move $2, $4
; MIPS32R6O0-NEXT: lw $1, %got(y)($1)
-; MIPS32R6O0-NEXT: addiu $2, $zero, -4
-; MIPS32R6O0-NEXT: and $5, $1, $2
+; MIPS32R6O0-NEXT: addiu $3, $zero, -4
+; MIPS32R6O0-NEXT: and $3, $1, $3
; MIPS32R6O0-NEXT: andi $1, $1, 3
-; MIPS32R6O0-NEXT: sll $9, $1, 3
-; MIPS32R6O0-NEXT: ori $1, $zero, 255
-; MIPS32R6O0-NEXT: sllv $7, $1, $9
-; MIPS32R6O0-NEXT: nor $8, $zero, $7
-; MIPS32R6O0-NEXT: sllv $6, $4, $9
+; MIPS32R6O0-NEXT: sll $1, $1, 3
+; MIPS32R6O0-NEXT: ori $5, $zero, 255
+; MIPS32R6O0-NEXT: sllv $5, $5, $1
+; MIPS32R6O0-NEXT: nor $6, $zero, $5
+; MIPS32R6O0-NEXT: sllv $4, $4, $1
; MIPS32R6O0-NEXT: $BB10_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32R6O0-NEXT: ll $2, 0($5)
-; MIPS32R6O0-NEXT: and $3, $2, $6
-; MIPS32R6O0-NEXT: nor $3, $zero, $3
-; MIPS32R6O0-NEXT: and $3, $3, $7
-; MIPS32R6O0-NEXT: and $4, $2, $8
-; MIPS32R6O0-NEXT: or $4, $4, $3
-; MIPS32R6O0-NEXT: sc $4, 0($5)
-; MIPS32R6O0-NEXT: beqzc $4, $BB10_1
+; MIPS32R6O0-NEXT: ll $8, 0($3)
+; MIPS32R6O0-NEXT: and $9, $8, $4
+; MIPS32R6O0-NEXT: nor $9, $zero, $9
+; MIPS32R6O0-NEXT: and $9, $9, $5
+; MIPS32R6O0-NEXT: and $10, $8, $6
+; MIPS32R6O0-NEXT: or $10, $10, $9
+; MIPS32R6O0-NEXT: sc $10, 0($3)
+; MIPS32R6O0-NEXT: beqzc $10, $BB10_1
; MIPS32R6O0-NEXT: # %bb.2: # %entry
-; MIPS32R6O0-NEXT: and $1, $2, $7
-; MIPS32R6O0-NEXT: srlv $1, $1, $9
-; MIPS32R6O0-NEXT: seb $1, $1
+; MIPS32R6O0-NEXT: and $7, $8, $5
+; MIPS32R6O0-NEXT: srlv $7, $7, $1
+; MIPS32R6O0-NEXT: seb $7, $7
; MIPS32R6O0-NEXT: # %bb.3: # %entry
-; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.4: # %entry
; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: seb $2, $1
@@ -3590,34 +3596,34 @@ define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind {
; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
-; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadNand8)))
-; MIPS64R6O0-NEXT: move $1, $4
-; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2)
-; MIPS64R6O0-NEXT: daddiu $3, $zero, -4
-; MIPS64R6O0-NEXT: and $5, $2, $3
-; MIPS64R6O0-NEXT: andi $2, $2, 3
-; MIPS64R6O0-NEXT: xori $2, $2, 3
-; MIPS64R6O0-NEXT: sll $9, $2, 3
-; MIPS64R6O0-NEXT: ori $2, $zero, 255
-; MIPS64R6O0-NEXT: sllv $7, $2, $9
-; MIPS64R6O0-NEXT: nor $8, $zero, $7
-; MIPS64R6O0-NEXT: sllv $6, $1, $9
+; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8)))
+; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
+; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
+; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
+; MIPS64R6O0-NEXT: and $2, $1, $2
+; MIPS64R6O0-NEXT: andi $1, $1, 3
+; MIPS64R6O0-NEXT: xori $1, $1, 3
+; MIPS64R6O0-NEXT: sll $1, $1, 3
+; MIPS64R6O0-NEXT: ori $3, $zero, 255
+; MIPS64R6O0-NEXT: sllv $3, $3, $1
+; MIPS64R6O0-NEXT: nor $5, $zero, $3
+; MIPS64R6O0-NEXT: sllv $4, $4, $1
; MIPS64R6O0-NEXT: .LBB10_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $2, 0($5)
-; MIPS64R6O0-NEXT: and $3, $2, $6
-; MIPS64R6O0-NEXT: nor $3, $zero, $3
-; MIPS64R6O0-NEXT: and $3, $3, $7
-; MIPS64R6O0-NEXT: and $4, $2, $8
-; MIPS64R6O0-NEXT: or $4, $4, $3
-; MIPS64R6O0-NEXT: sc $4, 0($5)
-; MIPS64R6O0-NEXT: beqzc $4, .LBB10_1
+; MIPS64R6O0-NEXT: ll $7, 0($2)
+; MIPS64R6O0-NEXT: and $8, $7, $4
+; MIPS64R6O0-NEXT: nor $8, $zero, $8
+; MIPS64R6O0-NEXT: and $8, $8, $3
+; MIPS64R6O0-NEXT: and $9, $7, $5
+; MIPS64R6O0-NEXT: or $9, $9, $8
+; MIPS64R6O0-NEXT: sc $9, 0($2)
+; MIPS64R6O0-NEXT: beqzc $9, .LBB10_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
-; MIPS64R6O0-NEXT: and $1, $2, $7
-; MIPS64R6O0-NEXT: srlv $1, $1, $9
-; MIPS64R6O0-NEXT: seb $1, $1
+; MIPS64R6O0-NEXT: and $6, $7, $3
+; MIPS64R6O0-NEXT: srlv $6, $6, $1
+; MIPS64R6O0-NEXT: seb $6, $6
; MIPS64R6O0-NEXT: # %bb.3: # %entry
-; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seb $2, $1
@@ -3844,33 +3850,33 @@ define signext i8 @AtomicSwap8(i8 signext %newval) nounwind {
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: lw $1, %got(y)($1)
; MIPS32O0-NEXT: addiu $2, $zero, -4
-; MIPS32O0-NEXT: and $5, $1, $2
+; MIPS32O0-NEXT: and $2, $1, $2
; MIPS32O0-NEXT: andi $1, $1, 3
-; MIPS32O0-NEXT: sll $9, $1, 3
-; MIPS32O0-NEXT: ori $1, $zero, 255
-; MIPS32O0-NEXT: sllv $7, $1, $9
-; MIPS32O0-NEXT: nor $8, $zero, $7
-; MIPS32O0-NEXT: sllv $6, $4, $9
+; MIPS32O0-NEXT: sll $1, $1, 3
+; MIPS32O0-NEXT: ori $3, $zero, 255
+; MIPS32O0-NEXT: sllv $3, $3, $1
+; MIPS32O0-NEXT: nor $5, $zero, $3
+; MIPS32O0-NEXT: sllv $4, $4, $1
; MIPS32O0-NEXT: $BB11_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32O0-NEXT: ll $2, 0($5)
-; MIPS32O0-NEXT: and $3, $6, $7
-; MIPS32O0-NEXT: and $4, $2, $8
-; MIPS32O0-NEXT: or $4, $4, $3
-; MIPS32O0-NEXT: sc $4, 0($5)
-; MIPS32O0-NEXT: beqz $4, $BB11_1
+; MIPS32O0-NEXT: ll $7, 0($2)
+; MIPS32O0-NEXT: and $8, $4, $3
+; MIPS32O0-NEXT: and $9, $7, $5
+; MIPS32O0-NEXT: or $9, $9, $8
+; MIPS32O0-NEXT: sc $9, 0($2)
+; MIPS32O0-NEXT: beqz $9, $BB11_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
-; MIPS32O0-NEXT: and $1, $2, $7
-; MIPS32O0-NEXT: srlv $1, $1, $9
-; MIPS32O0-NEXT: sll $1, $1, 24
-; MIPS32O0-NEXT: sra $1, $1, 24
+; MIPS32O0-NEXT: and $6, $7, $3
+; MIPS32O0-NEXT: srlv $6, $6, $1
+; MIPS32O0-NEXT: sll $6, $6, 24
+; MIPS32O0-NEXT: sra $6, $6, 24
; MIPS32O0-NEXT: # %bb.3: # %entry
-; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: # %bb.4: # %entry
; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
-; MIPS32O0-NEXT: sll $1, $1, 24
-; MIPS32O0-NEXT: sra $2, $1, 24
+; MIPS32O0-NEXT: sll $2, $1, 24
+; MIPS32O0-NEXT: sra $2, $2, 24
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
@@ -3942,30 +3948,30 @@ define signext i8 @AtomicSwap8(i8 signext %newval) nounwind {
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: addu $1, $2, $25
-; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0
+; MIPS32R6O0-NEXT: move $2, $4
; MIPS32R6O0-NEXT: lw $1, %got(y)($1)
-; MIPS32R6O0-NEXT: addiu $2, $zero, -4
-; MIPS32R6O0-NEXT: and $5, $1, $2
+; MIPS32R6O0-NEXT: addiu $3, $zero, -4
+; MIPS32R6O0-NEXT: and $3, $1, $3
; MIPS32R6O0-NEXT: andi $1, $1, 3
-; MIPS32R6O0-NEXT: sll $9, $1, 3
-; MIPS32R6O0-NEXT: ori $1, $zero, 255
-; MIPS32R6O0-NEXT: sllv $7, $1, $9
-; MIPS32R6O0-NEXT: nor $8, $zero, $7
-; MIPS32R6O0-NEXT: sllv $6, $4, $9
+; MIPS32R6O0-NEXT: sll $1, $1, 3
+; MIPS32R6O0-NEXT: ori $5, $zero, 255
+; MIPS32R6O0-NEXT: sllv $5, $5, $1
+; MIPS32R6O0-NEXT: nor $6, $zero, $5
+; MIPS32R6O0-NEXT: sllv $4, $4, $1
; MIPS32R6O0-NEXT: $BB11_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32R6O0-NEXT: ll $2, 0($5)
-; MIPS32R6O0-NEXT: and $3, $6, $7
-; MIPS32R6O0-NEXT: and $4, $2, $8
-; MIPS32R6O0-NEXT: or $4, $4, $3
-; MIPS32R6O0-NEXT: sc $4, 0($5)
-; MIPS32R6O0-NEXT: beqzc $4, $BB11_1
+; MIPS32R6O0-NEXT: ll $8, 0($3)
+; MIPS32R6O0-NEXT: and $9, $4, $5
+; MIPS32R6O0-NEXT: and $10, $8, $6
+; MIPS32R6O0-NEXT: or $10, $10, $9
+; MIPS32R6O0-NEXT: sc $10, 0($3)
+; MIPS32R6O0-NEXT: beqzc $10, $BB11_1
; MIPS32R6O0-NEXT: # %bb.2: # %entry
-; MIPS32R6O0-NEXT: and $1, $2, $7
-; MIPS32R6O0-NEXT: srlv $1, $1, $9
-; MIPS32R6O0-NEXT: seb $1, $1
+; MIPS32R6O0-NEXT: and $7, $8, $5
+; MIPS32R6O0-NEXT: srlv $7, $7, $1
+; MIPS32R6O0-NEXT: seb $7, $7
; MIPS32R6O0-NEXT: # %bb.3: # %entry
-; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.4: # %entry
; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: seb $2, $1
@@ -4104,32 +4110,32 @@ define signext i8 @AtomicSwap8(i8 signext %newval) nounwind {
; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
-; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicSwap8)))
-; MIPS64R6O0-NEXT: move $1, $4
-; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2)
-; MIPS64R6O0-NEXT: daddiu $3, $zero, -4
-; MIPS64R6O0-NEXT: and $5, $2, $3
-; MIPS64R6O0-NEXT: andi $2, $2, 3
-; MIPS64R6O0-NEXT: xori $2, $2, 3
-; MIPS64R6O0-NEXT: sll $9, $2, 3
-; MIPS64R6O0-NEXT: ori $2, $zero, 255
-; MIPS64R6O0-NEXT: sllv $7, $2, $9
-; MIPS64R6O0-NEXT: nor $8, $zero, $7
-; MIPS64R6O0-NEXT: sllv $6, $1, $9
+; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8)))
+; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
+; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
+; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
+; MIPS64R6O0-NEXT: and $2, $1, $2
+; MIPS64R6O0-NEXT: andi $1, $1, 3
+; MIPS64R6O0-NEXT: xori $1, $1, 3
+; MIPS64R6O0-NEXT: sll $1, $1, 3
+; MIPS64R6O0-NEXT: ori $3, $zero, 255
+; MIPS64R6O0-NEXT: sllv $3, $3, $1
+; MIPS64R6O0-NEXT: nor $5, $zero, $3
+; MIPS64R6O0-NEXT: sllv $4, $4, $1
; MIPS64R6O0-NEXT: .LBB11_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $2, 0($5)
-; MIPS64R6O0-NEXT: and $3, $6, $7
-; MIPS64R6O0-NEXT: and $4, $2, $8
-; MIPS64R6O0-NEXT: or $4, $4, $3
-; MIPS64R6O0-NEXT: sc $4, 0($5)
-; MIPS64R6O0-NEXT: beqzc $4, .LBB11_1
+; MIPS64R6O0-NEXT: ll $7, 0($2)
+; MIPS64R6O0-NEXT: and $8, $4, $3
+; MIPS64R6O0-NEXT: and $9, $7, $5
+; MIPS64R6O0-NEXT: or $9, $9, $8
+; MIPS64R6O0-NEXT: sc $9, 0($2)
+; MIPS64R6O0-NEXT: beqzc $9, .LBB11_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
-; MIPS64R6O0-NEXT: and $1, $2, $7
-; MIPS64R6O0-NEXT: srlv $1, $1, $9
-; MIPS64R6O0-NEXT: seb $1, $1
+; MIPS64R6O0-NEXT: and $6, $7, $3
+; MIPS64R6O0-NEXT: srlv $6, $6, $1
+; MIPS64R6O0-NEXT: seb $6, $6
; MIPS64R6O0-NEXT: # %bb.3: # %entry
-; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seb $2, $1
@@ -4348,44 +4354,42 @@ define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwi
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addiu $sp, $sp, -8
-; MIPS32O0-NEXT: addu $3, $2, $25
-; MIPS32O0-NEXT: move $1, $5
-; MIPS32O0-NEXT: move $2, $4
-; MIPS32O0-NEXT: lw $3, %got(y)($3)
-; MIPS32O0-NEXT: addiu $4, $zero, -4
-; MIPS32O0-NEXT: and $4, $3, $4
-; MIPS32O0-NEXT: andi $3, $3, 3
-; MIPS32O0-NEXT: sll $9, $3, 3
+; MIPS32O0-NEXT: addu $1, $2, $25
+; MIPS32O0-NEXT: lw $1, %got(y)($1)
+; MIPS32O0-NEXT: addiu $2, $zero, -4
+; MIPS32O0-NEXT: and $2, $1, $2
+; MIPS32O0-NEXT: andi $1, $1, 3
+; MIPS32O0-NEXT: sll $1, $1, 3
; MIPS32O0-NEXT: ori $3, $zero, 255
-; MIPS32O0-NEXT: sllv $5, $3, $9
-; MIPS32O0-NEXT: nor $7, $zero, $5
-; MIPS32O0-NEXT: andi $2, $2, 255
-; MIPS32O0-NEXT: sllv $6, $2, $9
-; MIPS32O0-NEXT: andi $1, $1, 255
-; MIPS32O0-NEXT: sllv $8, $1, $9
+; MIPS32O0-NEXT: sllv $3, $3, $1
+; MIPS32O0-NEXT: nor $6, $zero, $3
+; MIPS32O0-NEXT: andi $4, $4, 255
+; MIPS32O0-NEXT: sllv $4, $4, $1
+; MIPS32O0-NEXT: andi $5, $5, 255
+; MIPS32O0-NEXT: sllv $5, $5, $1
; MIPS32O0-NEXT: $BB12_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32O0-NEXT: ll $2, 0($4)
-; MIPS32O0-NEXT: and $3, $2, $5
-; MIPS32O0-NEXT: bne $3, $6, $BB12_3
+; MIPS32O0-NEXT: ll $8, 0($2)
+; MIPS32O0-NEXT: and $9, $8, $3
+; MIPS32O0-NEXT: bne $9, $4, $BB12_3
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: # in Loop: Header=BB12_1 Depth=1
-; MIPS32O0-NEXT: and $2, $2, $7
-; MIPS32O0-NEXT: or $2, $2, $8
-; MIPS32O0-NEXT: sc $2, 0($4)
-; MIPS32O0-NEXT: beqz $2, $BB12_1
+; MIPS32O0-NEXT: and $8, $8, $6
+; MIPS32O0-NEXT: or $8, $8, $5
+; MIPS32O0-NEXT: sc $8, 0($2)
+; MIPS32O0-NEXT: beqz $8, $BB12_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: $BB12_3: # %entry
-; MIPS32O0-NEXT: srlv $1, $3, $9
-; MIPS32O0-NEXT: sll $1, $1, 24
-; MIPS32O0-NEXT: sra $1, $1, 24
+; MIPS32O0-NEXT: srlv $7, $9, $1
+; MIPS32O0-NEXT: sll $7, $7, 24
+; MIPS32O0-NEXT: sra $7, $7, 24
; MIPS32O0-NEXT: # %bb.4: # %entry
-; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: # %bb.5: # %entry
; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
-; MIPS32O0-NEXT: sll $1, $1, 24
-; MIPS32O0-NEXT: sra $2, $1, 24
+; MIPS32O0-NEXT: sll $2, $1, 24
+; MIPS32O0-NEXT: sra $2, $2, 24
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
@@ -4466,39 +4470,37 @@ define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwi
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
-; MIPS32R6O0-NEXT: addu $3, $2, $25
-; MIPS32R6O0-NEXT: move $1, $5
-; MIPS32R6O0-NEXT: move $2, $4
-; MIPS32R6O0-NEXT: # kill: def $a1 killed $at
-; MIPS32R6O0-NEXT: # kill: def $a0 killed $v0
-; MIPS32R6O0-NEXT: lw $3, %got(y)($3)
-; MIPS32R6O0-NEXT: addiu $4, $zero, -4
-; MIPS32R6O0-NEXT: and $4, $3, $4
-; MIPS32R6O0-NEXT: andi $3, $3, 3
-; MIPS32R6O0-NEXT: sll $9, $3, 3
-; MIPS32R6O0-NEXT: ori $3, $zero, 255
-; MIPS32R6O0-NEXT: sllv $5, $3, $9
-; MIPS32R6O0-NEXT: nor $7, $zero, $5
-; MIPS32R6O0-NEXT: andi $2, $2, 255
-; MIPS32R6O0-NEXT: sllv $6, $2, $9
-; MIPS32R6O0-NEXT: andi $1, $1, 255
-; MIPS32R6O0-NEXT: sllv $8, $1, $9
+; MIPS32R6O0-NEXT: addu $1, $2, $25
+; MIPS32R6O0-NEXT: move $2, $5
+; MIPS32R6O0-NEXT: move $3, $4
+; MIPS32R6O0-NEXT: lw $1, %got(y)($1)
+; MIPS32R6O0-NEXT: addiu $6, $zero, -4
+; MIPS32R6O0-NEXT: and $6, $1, $6
+; MIPS32R6O0-NEXT: andi $1, $1, 3
+; MIPS32R6O0-NEXT: sll $1, $1, 3
+; MIPS32R6O0-NEXT: ori $7, $zero, 255
+; MIPS32R6O0-NEXT: sllv $7, $7, $1
+; MIPS32R6O0-NEXT: nor $8, $zero, $7
+; MIPS32R6O0-NEXT: andi $4, $4, 255
+; MIPS32R6O0-NEXT: sllv $4, $4, $1
+; MIPS32R6O0-NEXT: andi $5, $5, 255
+; MIPS32R6O0-NEXT: sllv $5, $5, $1
; MIPS32R6O0-NEXT: $BB12_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32R6O0-NEXT: ll $2, 0($4)
-; MIPS32R6O0-NEXT: and $3, $2, $5
-; MIPS32R6O0-NEXT: bnec $3, $6, $BB12_3
+; MIPS32R6O0-NEXT: ll $10, 0($6)
+; MIPS32R6O0-NEXT: and $11, $10, $7
+; MIPS32R6O0-NEXT: bnec $11, $4, $BB12_3
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1
-; MIPS32R6O0-NEXT: and $2, $2, $7
-; MIPS32R6O0-NEXT: or $2, $2, $8
-; MIPS32R6O0-NEXT: sc $2, 0($4)
-; MIPS32R6O0-NEXT: beqzc $2, $BB12_1
+; MIPS32R6O0-NEXT: and $10, $10, $8
+; MIPS32R6O0-NEXT: or $10, $10, $5
+; MIPS32R6O0-NEXT: sc $10, 0($6)
+; MIPS32R6O0-NEXT: beqzc $10, $BB12_1
; MIPS32R6O0-NEXT: $BB12_3: # %entry
-; MIPS32R6O0-NEXT: srlv $1, $3, $9
-; MIPS32R6O0-NEXT: seb $1, $1
+; MIPS32R6O0-NEXT: srlv $9, $11, $1
+; MIPS32R6O0-NEXT: seb $9, $9
; MIPS32R6O0-NEXT: # %bb.4: # %entry
-; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $9, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.5: # %entry
; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
@@ -4658,38 +4660,38 @@ define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwi
; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
-; MIPS64R6O0-NEXT: daddiu $3, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8)))
-; MIPS64R6O0-NEXT: move $1, $5
-; MIPS64R6O0-NEXT: move $2, $4
-; MIPS64R6O0-NEXT: ld $3, %got_disp(y)($3)
-; MIPS64R6O0-NEXT: daddiu $4, $zero, -4
-; MIPS64R6O0-NEXT: and $4, $3, $4
-; MIPS64R6O0-NEXT: andi $3, $3, 3
-; MIPS64R6O0-NEXT: xori $3, $3, 3
-; MIPS64R6O0-NEXT: sll $9, $3, 3
+; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8)))
+; MIPS64R6O0-NEXT: # kill: def $a1 killed $a1 killed $a1_64
+; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
+; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
+; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
+; MIPS64R6O0-NEXT: and $2, $1, $2
+; MIPS64R6O0-NEXT: andi $1, $1, 3
+; MIPS64R6O0-NEXT: xori $1, $1, 3
+; MIPS64R6O0-NEXT: sll $1, $1, 3
; MIPS64R6O0-NEXT: ori $3, $zero, 255
-; MIPS64R6O0-NEXT: sllv $5, $3, $9
-; MIPS64R6O0-NEXT: nor $7, $zero, $5
-; MIPS64R6O0-NEXT: andi $2, $2, 255
-; MIPS64R6O0-NEXT: sllv $6, $2, $9
-; MIPS64R6O0-NEXT: andi $1, $1, 255
-; MIPS64R6O0-NEXT: sllv $8, $1, $9
+; MIPS64R6O0-NEXT: sllv $3, $3, $1
+; MIPS64R6O0-NEXT: nor $6, $zero, $3
+; MIPS64R6O0-NEXT: andi $4, $4, 255
+; MIPS64R6O0-NEXT: sllv $4, $4, $1
+; MIPS64R6O0-NEXT: andi $5, $5, 255
+; MIPS64R6O0-NEXT: sllv $5, $5, $1
; MIPS64R6O0-NEXT: .LBB12_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $2, 0($4)
-; MIPS64R6O0-NEXT: and $3, $2, $5
-; MIPS64R6O0-NEXT: bnec $3, $6, .LBB12_3
+; MIPS64R6O0-NEXT: ll $8, 0($2)
+; MIPS64R6O0-NEXT: and $9, $8, $3
+; MIPS64R6O0-NEXT: bnec $9, $4, .LBB12_3
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1
-; MIPS64R6O0-NEXT: and $2, $2, $7
-; MIPS64R6O0-NEXT: or $2, $2, $8
-; MIPS64R6O0-NEXT: sc $2, 0($4)
-; MIPS64R6O0-NEXT: beqzc $2, .LBB12_1
+; MIPS64R6O0-NEXT: and $8, $8, $6
+; MIPS64R6O0-NEXT: or $8, $8, $5
+; MIPS64R6O0-NEXT: sc $8, 0($2)
+; MIPS64R6O0-NEXT: beqzc $8, .LBB12_1
; MIPS64R6O0-NEXT: .LBB12_3: # %entry
-; MIPS64R6O0-NEXT: srlv $1, $3, $9
-; MIPS64R6O0-NEXT: seb $1, $1
+; MIPS64R6O0-NEXT: srlv $7, $9, $1
+; MIPS64R6O0-NEXT: seb $7, $7
; MIPS64R6O0-NEXT: # %bb.4: # %entry
-; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.5: # %entry
; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
@@ -4933,47 +4935,44 @@ define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) n
; MIPS32O0-LABEL: AtomicCmpSwapRes8:
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: addiu $sp, $sp, -8
-; MIPS32O0-NEXT: move $1, $6
-; MIPS32O0-NEXT: move $2, $5
-; MIPS32O0-NEXT: move $3, $4
-; MIPS32O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill
-; MIPS32O0-NEXT: addiu $4, $zero, -4
-; MIPS32O0-NEXT: and $4, $3, $4
-; MIPS32O0-NEXT: andi $3, $3, 3
-; MIPS32O0-NEXT: sll $9, $3, 3
+; MIPS32O0-NEXT: addiu $1, $zero, -4
+; MIPS32O0-NEXT: and $1, $4, $1
+; MIPS32O0-NEXT: andi $2, $4, 3
+; MIPS32O0-NEXT: sll $2, $2, 3
; MIPS32O0-NEXT: ori $3, $zero, 255
-; MIPS32O0-NEXT: sllv $5, $3, $9
-; MIPS32O0-NEXT: nor $7, $zero, $5
-; MIPS32O0-NEXT: andi $2, $2, 255
-; MIPS32O0-NEXT: sllv $6, $2, $9
-; MIPS32O0-NEXT: andi $1, $1, 255
-; MIPS32O0-NEXT: sllv $8, $1, $9
+; MIPS32O0-NEXT: sllv $3, $3, $2
+; MIPS32O0-NEXT: nor $4, $zero, $3
+; MIPS32O0-NEXT: andi $7, $5, 255
+; MIPS32O0-NEXT: sllv $7, $7, $2
+; MIPS32O0-NEXT: andi $6, $6, 255
+; MIPS32O0-NEXT: sllv $6, $6, $2
; MIPS32O0-NEXT: $BB13_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32O0-NEXT: ll $2, 0($4)
-; MIPS32O0-NEXT: and $3, $2, $5
-; MIPS32O0-NEXT: bne $3, $6, $BB13_3
+; MIPS32O0-NEXT: ll $9, 0($1)
+; MIPS32O0-NEXT: and $10, $9, $3
+; MIPS32O0-NEXT: bne $10, $7, $BB13_3
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: # in Loop: Header=BB13_1 Depth=1
-; MIPS32O0-NEXT: and $2, $2, $7
-; MIPS32O0-NEXT: or $2, $2, $8
-; MIPS32O0-NEXT: sc $2, 0($4)
-; MIPS32O0-NEXT: beqz $2, $BB13_1
+; MIPS32O0-NEXT: and $9, $9, $4
+; MIPS32O0-NEXT: or $9, $9, $6
+; MIPS32O0-NEXT: sc $9, 0($1)
+; MIPS32O0-NEXT: beqz $9, $BB13_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: $BB13_3: # %entry
-; MIPS32O0-NEXT: srlv $1, $3, $9
-; MIPS32O0-NEXT: sll $1, $1, 24
-; MIPS32O0-NEXT: sra $1, $1, 24
+; MIPS32O0-NEXT: srlv $8, $10, $2
+; MIPS32O0-NEXT: sll $8, $8, 24
+; MIPS32O0-NEXT: sra $8, $8, 24
; MIPS32O0-NEXT: # %bb.4: # %entry
-; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill
+; MIPS32O0-NEXT: sw $8, 0($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: # %bb.5: # %entry
; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
-; MIPS32O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload
-; MIPS32O0-NEXT: sll $2, $2, 24
+; MIPS32O0-NEXT: sll $2, $1, 24
; MIPS32O0-NEXT: sra $2, $2, 24
-; MIPS32O0-NEXT: xor $1, $1, $2
-; MIPS32O0-NEXT: sltiu $2, $1, 1
+; MIPS32O0-NEXT: lw $3, 0($sp) # 4-byte Folded Reload
+; MIPS32O0-NEXT: xor $2, $3, $2
+; MIPS32O0-NEXT: sltiu $2, $2, 1
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
@@ -5049,40 +5048,37 @@ define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) n
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: move $1, $6
; MIPS32R6O0-NEXT: move $2, $5
-; MIPS32R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: move $3, $4
-; MIPS32R6O0-NEXT: # kill: def $a2 killed $at
-; MIPS32R6O0-NEXT: # kill: def $a1 killed $v0
-; MIPS32R6O0-NEXT: addiu $4, $zero, -4
-; MIPS32R6O0-NEXT: and $4, $3, $4
-; MIPS32R6O0-NEXT: andi $3, $3, 3
-; MIPS32R6O0-NEXT: sll $9, $3, 3
-; MIPS32R6O0-NEXT: ori $3, $zero, 255
-; MIPS32R6O0-NEXT: sllv $5, $3, $9
-; MIPS32R6O0-NEXT: nor $7, $zero, $5
-; MIPS32R6O0-NEXT: andi $2, $2, 255
-; MIPS32R6O0-NEXT: sllv $6, $2, $9
-; MIPS32R6O0-NEXT: andi $1, $1, 255
-; MIPS32R6O0-NEXT: sllv $8, $1, $9
+; MIPS32R6O0-NEXT: addiu $3, $zero, -4
+; MIPS32R6O0-NEXT: and $3, $4, $3
+; MIPS32R6O0-NEXT: andi $4, $4, 3
+; MIPS32R6O0-NEXT: sll $4, $4, 3
+; MIPS32R6O0-NEXT: ori $7, $zero, 255
+; MIPS32R6O0-NEXT: sllv $7, $7, $4
+; MIPS32R6O0-NEXT: nor $8, $zero, $7
+; MIPS32R6O0-NEXT: andi $9, $5, 255
+; MIPS32R6O0-NEXT: sllv $9, $9, $4
+; MIPS32R6O0-NEXT: andi $6, $6, 255
+; MIPS32R6O0-NEXT: sllv $6, $6, $4
; MIPS32R6O0-NEXT: $BB13_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32R6O0-NEXT: ll $2, 0($4)
-; MIPS32R6O0-NEXT: and $3, $2, $5
-; MIPS32R6O0-NEXT: bnec $3, $6, $BB13_3
+; MIPS32R6O0-NEXT: ll $11, 0($3)
+; MIPS32R6O0-NEXT: and $12, $11, $7
+; MIPS32R6O0-NEXT: bnec $12, $9, $BB13_3
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1
-; MIPS32R6O0-NEXT: and $2, $2, $7
-; MIPS32R6O0-NEXT: or $2, $2, $8
-; MIPS32R6O0-NEXT: sc $2, 0($4)
-; MIPS32R6O0-NEXT: beqzc $2, $BB13_1
+; MIPS32R6O0-NEXT: and $11, $11, $8
+; MIPS32R6O0-NEXT: or $11, $11, $6
+; MIPS32R6O0-NEXT: sc $11, 0($3)
+; MIPS32R6O0-NEXT: beqzc $11, $BB13_1
; MIPS32R6O0-NEXT: $BB13_3: # %entry
-; MIPS32R6O0-NEXT: srlv $1, $3, $9
-; MIPS32R6O0-NEXT: seb $1, $1
+; MIPS32R6O0-NEXT: srlv $10, $12, $4
+; MIPS32R6O0-NEXT: seb $10, $10
; MIPS32R6O0-NEXT: # %bb.4: # %entry
-; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $10, 0($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.5: # %entry
-; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
-; MIPS32R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload
+; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
+; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: xor $1, $1, $2
; MIPS32R6O0-NEXT: sltiu $2, $1, 1
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
@@ -5231,41 +5227,40 @@ define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) n
; MIPS64R6O0-LABEL: AtomicCmpSwapRes8:
; MIPS64R6O0: # %bb.0: # %entry
; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
-; MIPS64R6O0-NEXT: move $3, $4
-; MIPS64R6O0-NEXT: move $1, $6
-; MIPS64R6O0-NEXT: move $2, $5
-; MIPS64R6O0-NEXT: sw $2, 8($sp) # 4-byte Folded Spill
-; MIPS64R6O0-NEXT: daddiu $4, $zero, -4
-; MIPS64R6O0-NEXT: and $4, $3, $4
-; MIPS64R6O0-NEXT: andi $3, $3, 3
-; MIPS64R6O0-NEXT: xori $3, $3, 3
-; MIPS64R6O0-NEXT: sll $9, $3, 3
+; MIPS64R6O0-NEXT: # kill: def $a2 killed $a2 killed $a2_64
+; MIPS64R6O0-NEXT: # kill: def $a1 killed $a1 killed $a1_64
+; MIPS64R6O0-NEXT: daddiu $1, $zero, -4
+; MIPS64R6O0-NEXT: and $1, $4, $1
+; MIPS64R6O0-NEXT: andi $2, $4, 3
+; MIPS64R6O0-NEXT: xori $2, $2, 3
+; MIPS64R6O0-NEXT: sll $2, $2, 3
; MIPS64R6O0-NEXT: ori $3, $zero, 255
-; MIPS64R6O0-NEXT: sllv $5, $3, $9
-; MIPS64R6O0-NEXT: nor $7, $zero, $5
-; MIPS64R6O0-NEXT: andi $2, $2, 255
-; MIPS64R6O0-NEXT: sllv $6, $2, $9
-; MIPS64R6O0-NEXT: andi $1, $1, 255
-; MIPS64R6O0-NEXT: sllv $8, $1, $9
+; MIPS64R6O0-NEXT: sllv $3, $3, $2
+; MIPS64R6O0-NEXT: nor $4, $zero, $3
+; MIPS64R6O0-NEXT: andi $7, $5, 255
+; MIPS64R6O0-NEXT: sllv $7, $7, $2
+; MIPS64R6O0-NEXT: andi $6, $6, 255
+; MIPS64R6O0-NEXT: sllv $6, $6, $2
; MIPS64R6O0-NEXT: .LBB13_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $2, 0($4)
-; MIPS64R6O0-NEXT: and $3, $2, $5
-; MIPS64R6O0-NEXT: bnec $3, $6, .LBB13_3
+; MIPS64R6O0-NEXT: ll $9, 0($1)
+; MIPS64R6O0-NEXT: and $10, $9, $3
+; MIPS64R6O0-NEXT: bnec $10, $7, .LBB13_3
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1
-; MIPS64R6O0-NEXT: and $2, $2, $7
-; MIPS64R6O0-NEXT: or $2, $2, $8
-; MIPS64R6O0-NEXT: sc $2, 0($4)
-; MIPS64R6O0-NEXT: beqzc $2, .LBB13_1
+; MIPS64R6O0-NEXT: and $9, $9, $4
+; MIPS64R6O0-NEXT: or $9, $9, $6
+; MIPS64R6O0-NEXT: sc $9, 0($1)
+; MIPS64R6O0-NEXT: beqzc $9, .LBB13_1
; MIPS64R6O0-NEXT: .LBB13_3: # %entry
-; MIPS64R6O0-NEXT: srlv $1, $3, $9
-; MIPS64R6O0-NEXT: seb $1, $1
+; MIPS64R6O0-NEXT: srlv $8, $10, $2
+; MIPS64R6O0-NEXT: seb $8, $8
; MIPS64R6O0-NEXT: # %bb.4: # %entry
-; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $5, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.5: # %entry
-; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
-; MIPS64R6O0-NEXT: lw $2, 8($sp) # 4-byte Folded Reload
+; MIPS64R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
+; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: xor $1, $1, $2
; MIPS64R6O0-NEXT: sltiu $2, $1, 1
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
@@ -5507,34 +5502,34 @@ define signext i16 @AtomicLoadAdd16(i16 signext %incr) nounwind {
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: lw $1, %got(z)($1)
; MIPS32O0-NEXT: addiu $2, $zero, -4
-; MIPS32O0-NEXT: and $5, $1, $2
+; MIPS32O0-NEXT: and $2, $1, $2
; MIPS32O0-NEXT: andi $1, $1, 3
-; MIPS32O0-NEXT: sll $9, $1, 3
-; MIPS32O0-NEXT: ori $1, $zero, 65535
-; MIPS32O0-NEXT: sllv $7, $1, $9
-; MIPS32O0-NEXT: nor $8, $zero, $7
-; MIPS32O0-NEXT: sllv $6, $4, $9
+; MIPS32O0-NEXT: sll $1, $1, 3
+; MIPS32O0-NEXT: ori $3, $zero, 65535
+; MIPS32O0-NEXT: sllv $3, $3, $1
+; MIPS32O0-NEXT: nor $5, $zero, $3
+; MIPS32O0-NEXT: sllv $4, $4, $1
; MIPS32O0-NEXT: $BB14_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32O0-NEXT: ll $2, 0($5)
-; MIPS32O0-NEXT: addu $3, $2, $6
-; MIPS32O0-NEXT: and $3, $3, $7
-; MIPS32O0-NEXT: and $4, $2, $8
-; MIPS32O0-NEXT: or $4, $4, $3
-; MIPS32O0-NEXT: sc $4, 0($5)
-; MIPS32O0-NEXT: beqz $4, $BB14_1
+; MIPS32O0-NEXT: ll $7, 0($2)
+; MIPS32O0-NEXT: addu $8, $7, $4
+; MIPS32O0-NEXT: and $8, $8, $3
+; MIPS32O0-NEXT: and $9, $7, $5
+; MIPS32O0-NEXT: or $9, $9, $8
+; MIPS32O0-NEXT: sc $9, 0($2)
+; MIPS32O0-NEXT: beqz $9, $BB14_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
-; MIPS32O0-NEXT: and $1, $2, $7
-; MIPS32O0-NEXT: srlv $1, $1, $9
-; MIPS32O0-NEXT: sll $1, $1, 16
-; MIPS32O0-NEXT: sra $1, $1, 16
+; MIPS32O0-NEXT: and $6, $7, $3
+; MIPS32O0-NEXT: srlv $6, $6, $1
+; MIPS32O0-NEXT: sll $6, $6, 16
+; MIPS32O0-NEXT: sra $6, $6, 16
; MIPS32O0-NEXT: # %bb.3: # %entry
-; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: # %bb.4: # %entry
; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
-; MIPS32O0-NEXT: sll $1, $1, 16
-; MIPS32O0-NEXT: sra $2, $1, 16
+; MIPS32O0-NEXT: sll $2, $1, 16
+; MIPS32O0-NEXT: sra $2, $2, 16
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
@@ -5608,31 +5603,31 @@ define signext i16 @AtomicLoadAdd16(i16 signext %incr) nounwind {
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: addu $1, $2, $25
-; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0
+; MIPS32R6O0-NEXT: move $2, $4
; MIPS32R6O0-NEXT: lw $1, %got(z)($1)
-; MIPS32R6O0-NEXT: addiu $2, $zero, -4
-; MIPS32R6O0-NEXT: and $5, $1, $2
+; MIPS32R6O0-NEXT: addiu $3, $zero, -4
+; MIPS32R6O0-NEXT: and $3, $1, $3
; MIPS32R6O0-NEXT: andi $1, $1, 3
-; MIPS32R6O0-NEXT: sll $9, $1, 3
-; MIPS32R6O0-NEXT: ori $1, $zero, 65535
-; MIPS32R6O0-NEXT: sllv $7, $1, $9
-; MIPS32R6O0-NEXT: nor $8, $zero, $7
-; MIPS32R6O0-NEXT: sllv $6, $4, $9
+; MIPS32R6O0-NEXT: sll $1, $1, 3
+; MIPS32R6O0-NEXT: ori $5, $zero, 65535
+; MIPS32R6O0-NEXT: sllv $5, $5, $1
+; MIPS32R6O0-NEXT: nor $6, $zero, $5
+; MIPS32R6O0-NEXT: sllv $4, $4, $1
; MIPS32R6O0-NEXT: $BB14_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32R6O0-NEXT: ll $2, 0($5)
-; MIPS32R6O0-NEXT: addu $3, $2, $6
-; MIPS32R6O0-NEXT: and $3, $3, $7
-; MIPS32R6O0-NEXT: and $4, $2, $8
-; MIPS32R6O0-NEXT: or $4, $4, $3
-; MIPS32R6O0-NEXT: sc $4, 0($5)
-; MIPS32R6O0-NEXT: beqzc $4, $BB14_1
+; MIPS32R6O0-NEXT: ll $8, 0($3)
+; MIPS32R6O0-NEXT: addu $9, $8, $4
+; MIPS32R6O0-NEXT: and $9, $9, $5
+; MIPS32R6O0-NEXT: and $10, $8, $6
+; MIPS32R6O0-NEXT: or $10, $10, $9
+; MIPS32R6O0-NEXT: sc $10, 0($3)
+; MIPS32R6O0-NEXT: beqzc $10, $BB14_1
; MIPS32R6O0-NEXT: # %bb.2: # %entry
-; MIPS32R6O0-NEXT: and $1, $2, $7
-; MIPS32R6O0-NEXT: srlv $1, $1, $9
-; MIPS32R6O0-NEXT: seh $1, $1
+; MIPS32R6O0-NEXT: and $7, $8, $5
+; MIPS32R6O0-NEXT: srlv $7, $7, $1
+; MIPS32R6O0-NEXT: seh $7, $7
; MIPS32R6O0-NEXT: # %bb.3: # %entry
-; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.4: # %entry
; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: seh $2, $1
@@ -5775,33 +5770,33 @@ define signext i16 @AtomicLoadAdd16(i16 signext %incr) nounwind {
; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
-; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16)))
-; MIPS64R6O0-NEXT: move $1, $4
-; MIPS64R6O0-NEXT: ld $2, %got_disp(z)($2)
-; MIPS64R6O0-NEXT: daddiu $3, $zero, -4
-; MIPS64R6O0-NEXT: and $5, $2, $3
-; MIPS64R6O0-NEXT: andi $2, $2, 3
-; MIPS64R6O0-NEXT: xori $2, $2, 2
-; MIPS64R6O0-NEXT: sll $9, $2, 3
-; MIPS64R6O0-NEXT: ori $2, $zero, 65535
-; MIPS64R6O0-NEXT: sllv $7, $2, $9
-; MIPS64R6O0-NEXT: nor $8, $zero, $7
-; MIPS64R6O0-NEXT: sllv $6, $1, $9
+; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16)))
+; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
+; MIPS64R6O0-NEXT: ld $1, %got_disp(z)($1)
+; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
+; MIPS64R6O0-NEXT: and $2, $1, $2
+; MIPS64R6O0-NEXT: andi $1, $1, 3
+; MIPS64R6O0-NEXT: xori $1, $1, 2
+; MIPS64R6O0-NEXT: sll $1, $1, 3
+; MIPS64R6O0-NEXT: ori $3, $zero, 65535
+; MIPS64R6O0-NEXT: sllv $3, $3, $1
+; MIPS64R6O0-NEXT: nor $5, $zero, $3
+; MIPS64R6O0-NEXT: sllv $4, $4, $1
; MIPS64R6O0-NEXT: .LBB14_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $2, 0($5)
-; MIPS64R6O0-NEXT: addu $3, $2, $6
-; MIPS64R6O0-NEXT: and $3, $3, $7
-; MIPS64R6O0-NEXT: and $4, $2, $8
-; MIPS64R6O0-NEXT: or $4, $4, $3
-; MIPS64R6O0-NEXT: sc $4, 0($5)
-; MIPS64R6O0-NEXT: beqzc $4, .LBB14_1
+; MIPS64R6O0-NEXT: ll $7, 0($2)
+; MIPS64R6O0-NEXT: addu $8, $7, $4
+; MIPS64R6O0-NEXT: and $8, $8, $3
+; MIPS64R6O0-NEXT: and $9, $7, $5
+; MIPS64R6O0-NEXT: or $9, $9, $8
+; MIPS64R6O0-NEXT: sc $9, 0($2)
+; MIPS64R6O0-NEXT: beqzc $9, .LBB14_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
-; MIPS64R6O0-NEXT: and $1, $2, $7
-; MIPS64R6O0-NEXT: srlv $1, $1, $9
-; MIPS64R6O0-NEXT: seh $1, $1
+; MIPS64R6O0-NEXT: and $6, $7, $3
+; MIPS64R6O0-NEXT: srlv $6, $6, $1
+; MIPS64R6O0-NEXT: seh $6, $6
; MIPS64R6O0-NEXT: # %bb.3: # %entry
-; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seh $2, $1
@@ -6030,47 +6025,46 @@ define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) {
; MIPS32O0: # %bb.0:
; MIPS32O0-NEXT: addiu $sp, $sp, -8
; MIPS32O0-NEXT: .cfi_def_cfa_offset 8
-; MIPS32O0-NEXT: move $1, $7
-; MIPS32O0-NEXT: move $3, $4
-; MIPS32O0-NEXT: addu $2, $5, $6
-; MIPS32O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill
+; MIPS32O0-NEXT: addu $1, $5, $6
; MIPS32O0-NEXT: sync
-; MIPS32O0-NEXT: addiu $4, $zero, -4
-; MIPS32O0-NEXT: and $4, $3, $4
-; MIPS32O0-NEXT: andi $3, $3, 3
-; MIPS32O0-NEXT: sll $9, $3, 3
-; MIPS32O0-NEXT: ori $3, $zero, 65535
-; MIPS32O0-NEXT: sllv $5, $3, $9
-; MIPS32O0-NEXT: nor $7, $zero, $5
-; MIPS32O0-NEXT: andi $2, $2, 65535
-; MIPS32O0-NEXT: sllv $6, $2, $9
-; MIPS32O0-NEXT: andi $1, $1, 65535
-; MIPS32O0-NEXT: sllv $8, $1, $9
+; MIPS32O0-NEXT: addiu $2, $zero, -4
+; MIPS32O0-NEXT: and $2, $4, $2
+; MIPS32O0-NEXT: andi $3, $4, 3
+; MIPS32O0-NEXT: sll $3, $3, 3
+; MIPS32O0-NEXT: ori $4, $zero, 65535
+; MIPS32O0-NEXT: sllv $4, $4, $3
+; MIPS32O0-NEXT: nor $5, $zero, $4
+; MIPS32O0-NEXT: andi $6, $1, 65535
+; MIPS32O0-NEXT: sllv $6, $6, $3
+; MIPS32O0-NEXT: andi $7, $7, 65535
+; MIPS32O0-NEXT: sllv $7, $7, $3
; MIPS32O0-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1
-; MIPS32O0-NEXT: ll $2, 0($4)
-; MIPS32O0-NEXT: and $3, $2, $5
-; MIPS32O0-NEXT: bne $3, $6, $BB15_3
+; MIPS32O0-NEXT: ll $9, 0($2)
+; MIPS32O0-NEXT: and $10, $9, $4
+; MIPS32O0-NEXT: bne $10, $6, $BB15_3
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
-; MIPS32O0-NEXT: and $2, $2, $7
-; MIPS32O0-NEXT: or $2, $2, $8
-; MIPS32O0-NEXT: sc $2, 0($4)
-; MIPS32O0-NEXT: beqz $2, $BB15_1
+; MIPS32O0-NEXT: and $9, $9, $5
+; MIPS32O0-NEXT: or $9, $9, $7
+; MIPS32O0-NEXT: sc $9, 0($2)
+; MIPS32O0-NEXT: beqz $9, $BB15_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: $BB15_3:
-; MIPS32O0-NEXT: srlv $1, $3, $9
-; MIPS32O0-NEXT: sll $1, $1, 16
-; MIPS32O0-NEXT: sra $1, $1, 16
+; MIPS32O0-NEXT: srlv $8, $10, $3
+; MIPS32O0-NEXT: sll $8, $8, 16
+; MIPS32O0-NEXT: sra $8, $8, 16
; MIPS32O0-NEXT: # %bb.4:
; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32O0-NEXT: sw $8, 0($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: # %bb.5:
-; MIPS32O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
-; MIPS32O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
-; MIPS32O0-NEXT: sll $1, $1, 16
-; MIPS32O0-NEXT: sra $1, $1, 16
-; MIPS32O0-NEXT: xor $1, $2, $1
-; MIPS32O0-NEXT: sltiu $3, $1, 1
+; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
+; MIPS32O0-NEXT: sll $2, $1, 16
+; MIPS32O0-NEXT: sra $2, $2, 16
+; MIPS32O0-NEXT: lw $3, 0($sp) # 4-byte Folded Reload
+; MIPS32O0-NEXT: xor $2, $3, $2
+; MIPS32O0-NEXT: sltiu $3, $2, 1
; MIPS32O0-NEXT: sync
+; MIPS32O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
@@ -6151,45 +6145,44 @@ define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) {
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: .cfi_def_cfa_offset 8
; MIPS32R6O0-NEXT: move $1, $7
-; MIPS32R6O0-NEXT: move $3, $4
-; MIPS32R6O0-NEXT: # kill: def $a3 killed $at
-; MIPS32R6O0-NEXT: # kill: def $v0 killed $a2
-; MIPS32R6O0-NEXT: # kill: def $v0 killed $a1
-; MIPS32R6O0-NEXT: addu $2, $5, $6
-; MIPS32R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: move $2, $6
+; MIPS32R6O0-NEXT: move $3, $5
+; MIPS32R6O0-NEXT: addu $5, $5, $6
; MIPS32R6O0-NEXT: sync
-; MIPS32R6O0-NEXT: addiu $4, $zero, -4
-; MIPS32R6O0-NEXT: and $4, $3, $4
-; MIPS32R6O0-NEXT: andi $3, $3, 3
-; MIPS32R6O0-NEXT: sll $9, $3, 3
-; MIPS32R6O0-NEXT: ori $3, $zero, 65535
-; MIPS32R6O0-NEXT: sllv $5, $3, $9
-; MIPS32R6O0-NEXT: nor $7, $zero, $5
-; MIPS32R6O0-NEXT: andi $2, $2, 65535
-; MIPS32R6O0-NEXT: sllv $6, $2, $9
-; MIPS32R6O0-NEXT: andi $1, $1, 65535
-; MIPS32R6O0-NEXT: sllv $8, $1, $9
+; MIPS32R6O0-NEXT: addiu $6, $zero, -4
+; MIPS32R6O0-NEXT: and $6, $4, $6
+; MIPS32R6O0-NEXT: andi $4, $4, 3
+; MIPS32R6O0-NEXT: sll $4, $4, 3
+; MIPS32R6O0-NEXT: ori $8, $zero, 65535
+; MIPS32R6O0-NEXT: sllv $8, $8, $4
+; MIPS32R6O0-NEXT: nor $9, $zero, $8
+; MIPS32R6O0-NEXT: andi $10, $5, 65535
+; MIPS32R6O0-NEXT: sllv $10, $10, $4
+; MIPS32R6O0-NEXT: andi $7, $7, 65535
+; MIPS32R6O0-NEXT: sllv $7, $7, $4
; MIPS32R6O0-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1
-; MIPS32R6O0-NEXT: ll $2, 0($4)
-; MIPS32R6O0-NEXT: and $3, $2, $5
-; MIPS32R6O0-NEXT: bnec $3, $6, $BB15_3
+; MIPS32R6O0-NEXT: ll $12, 0($6)
+; MIPS32R6O0-NEXT: and $13, $12, $8
+; MIPS32R6O0-NEXT: bnec $13, $10, $BB15_3
; MIPS32R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
-; MIPS32R6O0-NEXT: and $2, $2, $7
-; MIPS32R6O0-NEXT: or $2, $2, $8
-; MIPS32R6O0-NEXT: sc $2, 0($4)
-; MIPS32R6O0-NEXT: beqzc $2, $BB15_1
+; MIPS32R6O0-NEXT: and $12, $12, $9
+; MIPS32R6O0-NEXT: or $12, $12, $7
+; MIPS32R6O0-NEXT: sc $12, 0($6)
+; MIPS32R6O0-NEXT: beqzc $12, $BB15_1
; MIPS32R6O0-NEXT: $BB15_3:
-; MIPS32R6O0-NEXT: srlv $1, $3, $9
-; MIPS32R6O0-NEXT: seh $1, $1
+; MIPS32R6O0-NEXT: srlv $11, $13, $4
+; MIPS32R6O0-NEXT: seh $11, $11
; MIPS32R6O0-NEXT: # %bb.4:
-; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $11, 0($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.5:
-; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
-; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
-; MIPS32R6O0-NEXT: seh $1, $1
-; MIPS32R6O0-NEXT: xor $1, $2, $1
-; MIPS32R6O0-NEXT: sltiu $3, $1, 1
+; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
+; MIPS32R6O0-NEXT: seh $2, $1
+; MIPS32R6O0-NEXT: lw $3, 0($sp) # 4-byte Folded Reload
+; MIPS32R6O0-NEXT: xor $2, $3, $2
+; MIPS32R6O0-NEXT: sltiu $3, $2, 1
; MIPS32R6O0-NEXT: sync
+; MIPS32R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
;
@@ -6358,49 +6351,49 @@ define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) {
; MIPS64R6O0: # %bb.0:
; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
; MIPS64R6O0-NEXT: .cfi_def_cfa_offset 16
-; MIPS64R6O0-NEXT: move $3, $4
-; MIPS64R6O0-NEXT: move $1, $7
-; MIPS64R6O0-NEXT: sll $1, $1, 0
-; MIPS64R6O0-NEXT: move $2, $6
-; MIPS64R6O0-NEXT: sll $4, $2, 0
-; MIPS64R6O0-NEXT: move $2, $5
-; MIPS64R6O0-NEXT: sll $2, $2, 0
-; MIPS64R6O0-NEXT: addu $2, $2, $4
-; MIPS64R6O0-NEXT: sw $2, 8($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: # kill: def $a3 killed $a3 killed $a3_64
+; MIPS64R6O0-NEXT: sll $1, $7, 0
+; MIPS64R6O0-NEXT: # kill: def $a2 killed $a2 killed $a2_64
+; MIPS64R6O0-NEXT: sll $2, $6, 0
+; MIPS64R6O0-NEXT: # kill: def $a1 killed $a1 killed $a1_64
+; MIPS64R6O0-NEXT: sll $3, $5, 0
+; MIPS64R6O0-NEXT: addu $2, $3, $2
; MIPS64R6O0-NEXT: sync
-; MIPS64R6O0-NEXT: daddiu $4, $zero, -4
-; MIPS64R6O0-NEXT: and $4, $3, $4
-; MIPS64R6O0-NEXT: andi $3, $3, 3
-; MIPS64R6O0-NEXT: xori $3, $3, 2
-; MIPS64R6O0-NEXT: sll $9, $3, 3
-; MIPS64R6O0-NEXT: ori $3, $zero, 65535
-; MIPS64R6O0-NEXT: sllv $5, $3, $9
-; MIPS64R6O0-NEXT: nor $7, $zero, $5
-; MIPS64R6O0-NEXT: andi $2, $2, 65535
-; MIPS64R6O0-NEXT: sllv $6, $2, $9
+; MIPS64R6O0-NEXT: daddiu $3, $zero, -4
+; MIPS64R6O0-NEXT: and $3, $4, $3
+; MIPS64R6O0-NEXT: andi $4, $4, 3
+; MIPS64R6O0-NEXT: xori $4, $4, 2
+; MIPS64R6O0-NEXT: sll $4, $4, 3
+; MIPS64R6O0-NEXT: ori $5, $zero, 65535
+; MIPS64R6O0-NEXT: sllv $5, $5, $4
+; MIPS64R6O0-NEXT: nor $6, $zero, $5
+; MIPS64R6O0-NEXT: andi $7, $2, 65535
+; MIPS64R6O0-NEXT: sllv $7, $7, $4
; MIPS64R6O0-NEXT: andi $1, $1, 65535
-; MIPS64R6O0-NEXT: sllv $8, $1, $9
+; MIPS64R6O0-NEXT: sllv $1, $1, $4
; MIPS64R6O0-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $2, 0($4)
-; MIPS64R6O0-NEXT: and $3, $2, $5
-; MIPS64R6O0-NEXT: bnec $3, $6, .LBB15_3
+; MIPS64R6O0-NEXT: ll $9, 0($3)
+; MIPS64R6O0-NEXT: and $10, $9, $5
+; MIPS64R6O0-NEXT: bnec $10, $7, .LBB15_3
; MIPS64R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
-; MIPS64R6O0-NEXT: and $2, $2, $7
-; MIPS64R6O0-NEXT: or $2, $2, $8
-; MIPS64R6O0-NEXT: sc $2, 0($4)
-; MIPS64R6O0-NEXT: beqzc $2, .LBB15_1
+; MIPS64R6O0-NEXT: and $9, $9, $6
+; MIPS64R6O0-NEXT: or $9, $9, $1
+; MIPS64R6O0-NEXT: sc $9, 0($3)
+; MIPS64R6O0-NEXT: beqzc $9, .LBB15_1
; MIPS64R6O0-NEXT: .LBB15_3:
-; MIPS64R6O0-NEXT: srlv $1, $3, $9
-; MIPS64R6O0-NEXT: seh $1, $1
+; MIPS64R6O0-NEXT: srlv $8, $10, $4
+; MIPS64R6O0-NEXT: seh $8, $8
; MIPS64R6O0-NEXT: # %bb.4:
-; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $2, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.5:
-; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
-; MIPS64R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
-; MIPS64R6O0-NEXT: seh $1, $1
-; MIPS64R6O0-NEXT: xor $1, $2, $1
-; MIPS64R6O0-NEXT: sltiu $3, $1, 1
+; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
+; MIPS64R6O0-NEXT: seh $2, $1
+; MIPS64R6O0-NEXT: lw $3, 8($sp) # 4-byte Folded Reload
+; MIPS64R6O0-NEXT: xor $2, $3, $2
+; MIPS64R6O0-NEXT: sltiu $3, $2, 1
; MIPS64R6O0-NEXT: sync
+; MIPS64R6O0-NEXT: lw $2, 8($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
@@ -6627,13 +6620,13 @@ define i32 @CheckSync(i32 signext %v) nounwind noinline {
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: sync
-; MIPS32O0-NEXT: lw $3, %got(countsint)($1)
+; MIPS32O0-NEXT: lw $1, %got(countsint)($1)
; MIPS32O0-NEXT: $BB16_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32O0-NEXT: ll $2, 0($3)
-; MIPS32O0-NEXT: addu $1, $2, $4
-; MIPS32O0-NEXT: sc $1, 0($3)
-; MIPS32O0-NEXT: beqz $1, $BB16_1
+; MIPS32O0-NEXT: ll $2, 0($1)
+; MIPS32O0-NEXT: addu $3, $2, $4
+; MIPS32O0-NEXT: sc $3, 0($1)
+; MIPS32O0-NEXT: beqz $3, $BB16_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: sync
@@ -6682,13 +6675,13 @@ define i32 @CheckSync(i32 signext %v) nounwind noinline {
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: sync
-; MIPS32R6O0-NEXT: lw $3, %got(countsint)($1)
+; MIPS32R6O0-NEXT: lw $1, %got(countsint)($1)
; MIPS32R6O0-NEXT: $BB16_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32R6O0-NEXT: ll $2, 0($3)
-; MIPS32R6O0-NEXT: addu $1, $2, $4
-; MIPS32R6O0-NEXT: sc $1, 0($3)
-; MIPS32R6O0-NEXT: beqzc $1, $BB16_1
+; MIPS32R6O0-NEXT: ll $2, 0($1)
+; MIPS32R6O0-NEXT: addu $3, $2, $4
+; MIPS32R6O0-NEXT: sc $3, 0($1)
+; MIPS32R6O0-NEXT: beqzc $3, $BB16_1
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: sync
; MIPS32R6O0-NEXT: jrc $ra
@@ -6774,13 +6767,13 @@ define i32 @CheckSync(i32 signext %v) nounwind noinline {
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
; MIPS64R6O0-NEXT: sync
-; MIPS64R6O0-NEXT: ld $3, %got_disp(countsint)($1)
+; MIPS64R6O0-NEXT: ld $1, %got_disp(countsint)($1)
; MIPS64R6O0-NEXT: .LBB16_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $2, 0($3)
-; MIPS64R6O0-NEXT: addu $1, $2, $4
-; MIPS64R6O0-NEXT: sc $1, 0($3)
-; MIPS64R6O0-NEXT: beqzc $1, .LBB16_1
+; MIPS64R6O0-NEXT: ll $2, 0($1)
+; MIPS64R6O0-NEXT: addu $3, $2, $4
+; MIPS64R6O0-NEXT: sc $3, 0($1)
+; MIPS64R6O0-NEXT: beqzc $3, .LBB16_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: sync
; MIPS64R6O0-NEXT: jrc $ra
@@ -6925,29 +6918,29 @@ define i32 @zeroreg() nounwind {
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: sync
-; MIPS32O0-NEXT: lw $4, %got(a)($1)
-; MIPS32O0-NEXT: addiu $6, $zero, 0
-; MIPS32O0-NEXT: addiu $2, $zero, 1
-; MIPS32O0-NEXT: move $5, $2
+; MIPS32O0-NEXT: lw $1, %got(a)($1)
+; MIPS32O0-NEXT: addiu $2, $zero, 0
+; MIPS32O0-NEXT: addiu $3, $zero, 1
+; MIPS32O0-NEXT: move $4, $3
; MIPS32O0-NEXT: $BB17_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32O0-NEXT: ll $1, 0($4)
-; MIPS32O0-NEXT: bne $1, $5, $BB17_3
+; MIPS32O0-NEXT: ll $5, 0($1)
+; MIPS32O0-NEXT: bne $5, $4, $BB17_3
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: # in Loop: Header=BB17_1 Depth=1
-; MIPS32O0-NEXT: move $3, $6
-; MIPS32O0-NEXT: sc $3, 0($4)
-; MIPS32O0-NEXT: beqz $3, $BB17_1
+; MIPS32O0-NEXT: move $6, $2
+; MIPS32O0-NEXT: sc $6, 0($1)
+; MIPS32O0-NEXT: beqz $6, $BB17_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: $BB17_3: # %entry
-; MIPS32O0-NEXT: xor $2, $1, $2
-; MIPS32O0-NEXT: sltiu $2, $2, 1
+; MIPS32O0-NEXT: xor $1, $5, $3
+; MIPS32O0-NEXT: sltiu $1, $1, 1
; MIPS32O0-NEXT: sync
; MIPS32O0-NEXT: addiu $2, $zero, 1
-; MIPS32O0-NEXT: xor $1, $1, $2
-; MIPS32O0-NEXT: sltiu $1, $1, 1
-; MIPS32O0-NEXT: andi $2, $1, 1
+; MIPS32O0-NEXT: xor $2, $5, $2
+; MIPS32O0-NEXT: sltiu $2, $2, 1
+; MIPS32O0-NEXT: andi $2, $2, 1
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
@@ -7008,21 +7001,21 @@ define i32 @zeroreg() nounwind {
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: sync
-; MIPS32R6O0-NEXT: lw $4, %got(a)($1)
-; MIPS32R6O0-NEXT: addiu $6, $zero, 0
-; MIPS32R6O0-NEXT: addiu $2, $zero, 1
-; MIPS32R6O0-NEXT: move $5, $2
+; MIPS32R6O0-NEXT: lw $1, %got(a)($1)
+; MIPS32R6O0-NEXT: addiu $2, $zero, 0
+; MIPS32R6O0-NEXT: addiu $3, $zero, 1
+; MIPS32R6O0-NEXT: move $4, $3
; MIPS32R6O0-NEXT: $BB17_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32R6O0-NEXT: ll $1, 0($4)
-; MIPS32R6O0-NEXT: bnec $1, $5, $BB17_3
+; MIPS32R6O0-NEXT: ll $5, 0($1)
+; MIPS32R6O0-NEXT: bnec $5, $4, $BB17_3
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: # in Loop: Header=BB17_1 Depth=1
-; MIPS32R6O0-NEXT: move $3, $6
-; MIPS32R6O0-NEXT: sc $3, 0($4)
-; MIPS32R6O0-NEXT: beqzc $3, $BB17_1
+; MIPS32R6O0-NEXT: move $6, $2
+; MIPS32R6O0-NEXT: sc $6, 0($1)
+; MIPS32R6O0-NEXT: beqzc $6, $BB17_1
; MIPS32R6O0-NEXT: $BB17_3: # %entry
-; MIPS32R6O0-NEXT: xor $1, $1, $2
+; MIPS32R6O0-NEXT: xor $1, $5, $3
; MIPS32R6O0-NEXT: sltiu $2, $1, 1
; MIPS32R6O0-NEXT: sync
; MIPS32R6O0-NEXT: jrc $ra
@@ -7138,21 +7131,21 @@ define i32 @zeroreg() nounwind {
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg)))
; MIPS64R6O0-NEXT: sync
-; MIPS64R6O0-NEXT: ld $4, %got_disp(a)($1)
-; MIPS64R6O0-NEXT: addiu $6, $zero, 0
-; MIPS64R6O0-NEXT: addiu $2, $zero, 1
-; MIPS64R6O0-NEXT: move $5, $2
+; MIPS64R6O0-NEXT: ld $1, %got_disp(a)($1)
+; MIPS64R6O0-NEXT: addiu $2, $zero, 0
+; MIPS64R6O0-NEXT: addiu $3, $zero, 1
+; MIPS64R6O0-NEXT: move $4, $3
; MIPS64R6O0-NEXT: .LBB17_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $1, 0($4)
-; MIPS64R6O0-NEXT: bnec $1, $5, .LBB17_3
+; MIPS64R6O0-NEXT: ll $5, 0($1)
+; MIPS64R6O0-NEXT: bnec $5, $4, .LBB17_3
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: # in Loop: Header=BB17_1 Depth=1
-; MIPS64R6O0-NEXT: move $3, $6
-; MIPS64R6O0-NEXT: sc $3, 0($4)
-; MIPS64R6O0-NEXT: beqzc $3, .LBB17_1
+; MIPS64R6O0-NEXT: move $6, $2
+; MIPS64R6O0-NEXT: sc $6, 0($1)
+; MIPS64R6O0-NEXT: beqzc $6, .LBB17_1
; MIPS64R6O0-NEXT: .LBB17_3: # %entry
-; MIPS64R6O0-NEXT: xor $1, $1, $2
+; MIPS64R6O0-NEXT: xor $1, $5, $3
; MIPS64R6O0-NEXT: sltiu $2, $1, 1
; MIPS64R6O0-NEXT: sync
; MIPS64R6O0-NEXT: jrc $ra
@@ -7323,13 +7316,13 @@ define i32 @AtomicLoadAdd32_OffGt9Bit(i32 signext %incr) nounwind {
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: lw $1, %got(x)($1)
-; MIPS32O0-NEXT: addiu $3, $1, 1024
+; MIPS32O0-NEXT: addiu $1, $1, 1024
; MIPS32O0-NEXT: $BB18_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32O0-NEXT: ll $2, 0($3)
-; MIPS32O0-NEXT: addu $1, $2, $4
-; MIPS32O0-NEXT: sc $1, 0($3)
-; MIPS32O0-NEXT: beqz $1, $BB18_1
+; MIPS32O0-NEXT: ll $2, 0($1)
+; MIPS32O0-NEXT: addu $3, $2, $4
+; MIPS32O0-NEXT: sc $3, 0($1)
+; MIPS32O0-NEXT: beqz $3, $BB18_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: jr $ra
@@ -7376,13 +7369,13 @@ define i32 @AtomicLoadAdd32_OffGt9Bit(i32 signext %incr) nounwind {
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: lw $1, %got(x)($1)
-; MIPS32R6O0-NEXT: addiu $3, $1, 1024
+; MIPS32R6O0-NEXT: addiu $1, $1, 1024
; MIPS32R6O0-NEXT: $BB18_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS32R6O0-NEXT: ll $2, 0($3)
-; MIPS32R6O0-NEXT: addu $1, $2, $4
-; MIPS32R6O0-NEXT: sc $1, 0($3)
-; MIPS32R6O0-NEXT: beqzc $1, $BB18_1
+; MIPS32R6O0-NEXT: ll $2, 0($1)
+; MIPS32R6O0-NEXT: addu $3, $2, $4
+; MIPS32R6O0-NEXT: sc $3, 0($1)
+; MIPS32R6O0-NEXT: beqzc $3, $BB18_1
; MIPS32R6O0-NEXT: nop
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: jrc $ra
@@ -7465,13 +7458,13 @@ define i32 @AtomicLoadAdd32_OffGt9Bit(i32 signext %incr) nounwind {
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
-; MIPS64R6O0-NEXT: daddiu $3, $1, 1024
+; MIPS64R6O0-NEXT: daddiu $1, $1, 1024
; MIPS64R6O0-NEXT: .LBB18_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $2, 0($3)
-; MIPS64R6O0-NEXT: addu $1, $2, $4
-; MIPS64R6O0-NEXT: sc $1, 0($3)
-; MIPS64R6O0-NEXT: beqzc $1, .LBB18_1
+; MIPS64R6O0-NEXT: ll $2, 0($1)
+; MIPS64R6O0-NEXT: addu $3, $2, $4
+; MIPS64R6O0-NEXT: sc $3, 0($1)
+; MIPS64R6O0-NEXT: beqzc $3, .LBB18_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
diff --git a/llvm/test/CodeGen/Mips/atomic64.ll b/llvm/test/CodeGen/Mips/atomic64.ll
index d27c9ac42e05..5e59246eff5c 100644
--- a/llvm/test/CodeGen/Mips/atomic64.ll
+++ b/llvm/test/CodeGen/Mips/atomic64.ll
@@ -95,13 +95,13 @@ define i64 @AtomicLoadAdd(i64 signext %incr) nounwind {
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd)))
-; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1)
+; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB0_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: lld $2, 0($3)
-; MIPS64R6O0-NEXT: daddu $1, $2, $4
-; MIPS64R6O0-NEXT: scd $1, 0($3)
-; MIPS64R6O0-NEXT: beqzc $1, .LBB0_1
+; MIPS64R6O0-NEXT: lld $2, 0($1)
+; MIPS64R6O0-NEXT: daddu $3, $2, $4
+; MIPS64R6O0-NEXT: scd $3, 0($1)
+; MIPS64R6O0-NEXT: beqzc $3, .LBB0_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
@@ -252,13 +252,13 @@ define i64 @AtomicLoadSub(i64 signext %incr) nounwind {
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub)))
-; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1)
+; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB1_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: lld $2, 0($3)
-; MIPS64R6O0-NEXT: dsubu $1, $2, $4
-; MIPS64R6O0-NEXT: scd $1, 0($3)
-; MIPS64R6O0-NEXT: beqzc $1, .LBB1_1
+; MIPS64R6O0-NEXT: lld $2, 0($1)
+; MIPS64R6O0-NEXT: dsubu $3, $2, $4
+; MIPS64R6O0-NEXT: scd $3, 0($1)
+; MIPS64R6O0-NEXT: beqzc $3, .LBB1_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
@@ -409,13 +409,13 @@ define i64 @AtomicLoadAnd(i64 signext %incr) nounwind {
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd)))
-; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1)
+; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB2_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: lld $2, 0($3)
-; MIPS64R6O0-NEXT: and $1, $2, $4
-; MIPS64R6O0-NEXT: scd $1, 0($3)
-; MIPS64R6O0-NEXT: beqzc $1, .LBB2_1
+; MIPS64R6O0-NEXT: lld $2, 0($1)
+; MIPS64R6O0-NEXT: and $3, $2, $4
+; MIPS64R6O0-NEXT: scd $3, 0($1)
+; MIPS64R6O0-NEXT: beqzc $3, .LBB2_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
@@ -566,13 +566,13 @@ define i64 @AtomicLoadOr(i64 signext %incr) nounwind {
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr)))
-; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1)
+; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB3_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: lld $2, 0($3)
-; MIPS64R6O0-NEXT: or $1, $2, $4
-; MIPS64R6O0-NEXT: scd $1, 0($3)
-; MIPS64R6O0-NEXT: beqzc $1, .LBB3_1
+; MIPS64R6O0-NEXT: lld $2, 0($1)
+; MIPS64R6O0-NEXT: or $3, $2, $4
+; MIPS64R6O0-NEXT: scd $3, 0($1)
+; MIPS64R6O0-NEXT: beqzc $3, .LBB3_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
@@ -723,13 +723,13 @@ define i64 @AtomicLoadXor(i64 signext %incr) nounwind {
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor)))
-; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1)
+; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB4_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: lld $2, 0($3)
-; MIPS64R6O0-NEXT: xor $1, $2, $4
-; MIPS64R6O0-NEXT: scd $1, 0($3)
-; MIPS64R6O0-NEXT: beqzc $1, .LBB4_1
+; MIPS64R6O0-NEXT: lld $2, 0($1)
+; MIPS64R6O0-NEXT: xor $3, $2, $4
+; MIPS64R6O0-NEXT: scd $3, 0($1)
+; MIPS64R6O0-NEXT: beqzc $3, .LBB4_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
@@ -884,14 +884,14 @@ define i64 @AtomicLoadNand(i64 signext %incr) nounwind {
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand)))
-; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1)
+; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB5_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: lld $2, 0($3)
-; MIPS64R6O0-NEXT: and $1, $2, $4
-; MIPS64R6O0-NEXT: nor $1, $zero, $1
-; MIPS64R6O0-NEXT: scd $1, 0($3)
-; MIPS64R6O0-NEXT: beqzc $1, .LBB5_1
+; MIPS64R6O0-NEXT: lld $2, 0($1)
+; MIPS64R6O0-NEXT: and $3, $2, $4
+; MIPS64R6O0-NEXT: nor $3, $zero, $3
+; MIPS64R6O0-NEXT: scd $3, 0($1)
+; MIPS64R6O0-NEXT: beqzc $3, .LBB5_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
@@ -1057,15 +1057,16 @@ define i64 @AtomicSwap64(i64 signext %newval) nounwind {
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64)))
; MIPS64R6O0-NEXT: sd $4, 8($sp)
-; MIPS64R6O0-NEXT: ld $4, 8($sp)
-; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1)
+; MIPS64R6O0-NEXT: ld $2, 8($sp)
+; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB6_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: lld $2, 0($3)
-; MIPS64R6O0-NEXT: move $1, $4
-; MIPS64R6O0-NEXT: scd $1, 0($3)
-; MIPS64R6O0-NEXT: beqzc $1, .LBB6_1
+; MIPS64R6O0-NEXT: lld $3, 0($1)
+; MIPS64R6O0-NEXT: move $4, $2
+; MIPS64R6O0-NEXT: scd $4, 0($1)
+; MIPS64R6O0-NEXT: beqzc $4, .LBB6_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
+; MIPS64R6O0-NEXT: move $2, $3
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
@@ -1252,18 +1253,19 @@ define i64 @AtomicCmpSwap64(i64 signext %oldval, i64 signext %newval) nounwind {
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64)))
; MIPS64R6O0-NEXT: sd $5, 8($sp)
-; MIPS64R6O0-NEXT: ld $5, 8($sp)
-; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1)
+; MIPS64R6O0-NEXT: ld $2, 8($sp)
+; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB7_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: lld $2, 0($3)
-; MIPS64R6O0-NEXT: bnec $2, $4, .LBB7_3
+; MIPS64R6O0-NEXT: lld $3, 0($1)
+; MIPS64R6O0-NEXT: bnec $3, $4, .LBB7_3
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1
-; MIPS64R6O0-NEXT: move $1, $5
-; MIPS64R6O0-NEXT: scd $1, 0($3)
-; MIPS64R6O0-NEXT: beqzc $1, .LBB7_1
+; MIPS64R6O0-NEXT: move $5, $2
+; MIPS64R6O0-NEXT: scd $5, 0($1)
+; MIPS64R6O0-NEXT: beqzc $5, .LBB7_1
; MIPS64R6O0-NEXT: .LBB7_3: # %entry
+; MIPS64R6O0-NEXT: move $2, $3
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
diff --git a/llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll b/llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll
index ce994c2c18a9..64a62c170281 100644
--- a/llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll
+++ b/llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll
@@ -12,18 +12,18 @@ define void @foo(i32 %new, i32 %old) {
; O32-LABEL: foo:
; O32: # %bb.0: # %entry
; O32-NEXT: lui $1, %hi(sym)
-; O32-NEXT: lw $3, %lo(sym)($1)
+; O32-NEXT: lw $1, %lo(sym)($1)
; O32-NEXT: sync
; O32-NEXT: $BB0_1: # %entry
; O32-NEXT: # =>This Inner Loop Header: Depth=1
-; O32-NEXT: ll $1, 0($3)
-; O32-NEXT: bne $1, $4, $BB0_3
+; O32-NEXT: ll $2, 0($1)
+; O32-NEXT: bne $2, $4, $BB0_3
; O32-NEXT: nop
; O32-NEXT: # %bb.2: # %entry
; O32-NEXT: # in Loop: Header=BB0_1 Depth=1
-; O32-NEXT: move $2, $5
-; O32-NEXT: sc $2, 0($3)
-; O32-NEXT: beqz $2, $BB0_1
+; O32-NEXT: move $3, $5
+; O32-NEXT: sc $3, 0($1)
+; O32-NEXT: beqz $3, $BB0_1
; O32-NEXT: nop
; O32-NEXT: $BB0_3: # %entry
; O32-NEXT: sync
@@ -32,23 +32,23 @@ define void @foo(i32 %new, i32 %old) {
;
; N32-LABEL: foo:
; N32: # %bb.0: # %entry
-; N32-NEXT: move $1, $5
-; N32-NEXT: sll $5, $1, 0
-; N32-NEXT: move $1, $4
-; N32-NEXT: sll $4, $1, 0
-; N32-NEXT: lui $1, %hi(sym)
-; N32-NEXT: lw $3, %lo(sym)($1)
+; N32-NEXT: # kill: def $a1 killed $a1 killed $a1_64
+; N32-NEXT: sll $1, $5, 0
+; N32-NEXT: # kill: def $a0 killed $a0 killed $a0_64
+; N32-NEXT: sll $2, $4, 0
+; N32-NEXT: lui $3, %hi(sym)
+; N32-NEXT: lw $3, %lo(sym)($3)
; N32-NEXT: sync
; N32-NEXT: .LBB0_1: # %entry
; N32-NEXT: # =>This Inner Loop Header: Depth=1
-; N32-NEXT: ll $1, 0($3)
-; N32-NEXT: bne $1, $4, .LBB0_3
+; N32-NEXT: ll $4, 0($3)
+; N32-NEXT: bne $4, $2, .LBB0_3
; N32-NEXT: nop
; N32-NEXT: # %bb.2: # %entry
; N32-NEXT: # in Loop: Header=BB0_1 Depth=1
-; N32-NEXT: move $2, $5
-; N32-NEXT: sc $2, 0($3)
-; N32-NEXT: beqz $2, .LBB0_1
+; N32-NEXT: move $5, $1
+; N32-NEXT: sc $5, 0($3)
+; N32-NEXT: beqz $5, .LBB0_1
; N32-NEXT: nop
; N32-NEXT: .LBB0_3: # %entry
; N32-NEXT: sync
@@ -57,27 +57,27 @@ define void @foo(i32 %new, i32 %old) {
;
; N64-LABEL: foo:
; N64: # %bb.0: # %entry
-; N64-NEXT: move $1, $5
-; N64-NEXT: sll $5, $1, 0
-; N64-NEXT: move $1, $4
-; N64-NEXT: sll $4, $1, 0
-; N64-NEXT: lui $1, %highest(sym)
-; N64-NEXT: daddiu $1, $1, %higher(sym)
-; N64-NEXT: dsll $1, $1, 16
-; N64-NEXT: daddiu $1, $1, %hi(sym)
-; N64-NEXT: dsll $1, $1, 16
-; N64-NEXT: ld $3, %lo(sym)($1)
+; N64-NEXT: # kill: def $a1 killed $a1 killed $a1_64
+; N64-NEXT: sll $1, $5, 0
+; N64-NEXT: # kill: def $a0 killed $a0 killed $a0_64
+; N64-NEXT: sll $2, $4, 0
+; N64-NEXT: lui $3, %highest(sym)
+; N64-NEXT: daddiu $3, $3, %higher(sym)
+; N64-NEXT: dsll $3, $3, 16
+; N64-NEXT: daddiu $3, $3, %hi(sym)
+; N64-NEXT: dsll $3, $3, 16
+; N64-NEXT: ld $3, %lo(sym)($3)
; N64-NEXT: sync
; N64-NEXT: .LBB0_1: # %entry
; N64-NEXT: # =>This Inner Loop Header: Depth=1
-; N64-NEXT: ll $1, 0($3)
-; N64-NEXT: bne $1, $4, .LBB0_3
+; N64-NEXT: ll $4, 0($3)
+; N64-NEXT: bne $4, $2, .LBB0_3
; N64-NEXT: nop
; N64-NEXT: # %bb.2: # %entry
; N64-NEXT: # in Loop: Header=BB0_1 Depth=1
-; N64-NEXT: move $2, $5
-; N64-NEXT: sc $2, 0($3)
-; N64-NEXT: beqz $2, .LBB0_1
+; N64-NEXT: move $5, $1
+; N64-NEXT: sc $5, 0($3)
+; N64-NEXT: beqz $5, .LBB0_1
; N64-NEXT: nop
; N64-NEXT: .LBB0_3: # %entry
; N64-NEXT: sync
diff --git a/llvm/test/CodeGen/Mips/copy-fp64.ll b/llvm/test/CodeGen/Mips/copy-fp64.ll
index eb096e3787c2..439c788eb9be 100644
--- a/llvm/test/CodeGen/Mips/copy-fp64.ll
+++ b/llvm/test/CodeGen/Mips/copy-fp64.ll
@@ -11,8 +11,8 @@ define double @foo(double %self) {
; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: $d12_64, $t9, $v0
; CHECK: renamable $at = ADDu killed $v0, killed $t9
- ; CHECK: renamable $d6_64 = COPY killed $d12_64
; CHECK: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+ ; CHECK: $d6_64 = COPY killed renamable $d12_64
; CHECK: renamable $t9 = LW killed renamable $at, target-flags(mips-got) @bar
; CHECK: dead $ra = JALR killed $t9, csr_o32_fp64, target-flags(mips-jalr) <mcsymbol bar>, implicit-def dead $ra, implicit killed $d6_64, implicit-def $d0_64
; CHECK: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
diff --git a/llvm/test/CodeGen/Mips/implicit-sret.ll b/llvm/test/CodeGen/Mips/implicit-sret.ll
index 9c4d28fa0e47..b9f6568e40c9 100644
--- a/llvm/test/CodeGen/Mips/implicit-sret.ll
+++ b/llvm/test/CodeGen/Mips/implicit-sret.ll
@@ -20,8 +20,9 @@ define internal void @test() unnamed_addr nounwind {
; CHECK-NEXT: ld $5, 16($sp)
; CHECK-NEXT: ld $7, 32($sp)
; CHECK-NEXT: lw $1, 8($sp)
-; CHECK-NEXT: # implicit-def: $a0_64
-; CHECK-NEXT: move $4, $1
+; CHECK-NEXT: # implicit-def: $v0_64
+; CHECK-NEXT: move $2, $1
+; CHECK-NEXT: move $4, $2
; CHECK-NEXT: jal use_sret
; CHECK-NEXT: nop
; CHECK-NEXT: ld $ra, 40($sp) # 8-byte Folded Reload
@@ -40,15 +41,15 @@ start:
define internal { i32, i128, i64 } @implicit_sret_impl() unnamed_addr nounwind {
; CHECK-LABEL: implicit_sret_impl:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $at_64 killed $a0_64
-; CHECK-NEXT: daddiu $1, $zero, 20
-; CHECK-NEXT: sd $1, 16($4)
-; CHECK-NEXT: daddiu $1, $zero, 0
+; CHECK-NEXT: move $1, $4
+; CHECK-NEXT: daddiu $2, $zero, 20
+; CHECK-NEXT: sd $2, 16($4)
+; CHECK-NEXT: daddiu $2, $zero, 0
; CHECK-NEXT: sd $zero, 8($4)
-; CHECK-NEXT: daddiu $1, $zero, 30
-; CHECK-NEXT: sd $1, 24($4)
-; CHECK-NEXT: addiu $1, $zero, 10
-; CHECK-NEXT: sw $1, 0($4)
+; CHECK-NEXT: daddiu $3, $zero, 30
+; CHECK-NEXT: sd $3, 24($4)
+; CHECK-NEXT: addiu $3, $zero, 10
+; CHECK-NEXT: sw $3, 0($4)
; CHECK-NEXT: jr $ra
; CHECK-NEXT: nop
ret { i32, i128, i64 } { i32 10, i128 20, i64 30 }
@@ -69,10 +70,12 @@ define internal void @test2() unnamed_addr nounwind {
; CHECK-NEXT: lw $3, 4($sp)
; CHECK-NEXT: # implicit-def: $a0_64
; CHECK-NEXT: move $4, $3
-; CHECK-NEXT: # implicit-def: $a1_64
-; CHECK-NEXT: move $5, $2
-; CHECK-NEXT: # implicit-def: $a2_64
-; CHECK-NEXT: move $6, $1
+; CHECK-NEXT: # implicit-def: $v1_64
+; CHECK-NEXT: move $3, $2
+; CHECK-NEXT: # implicit-def: $v0_64
+; CHECK-NEXT: move $2, $1
+; CHECK-NEXT: move $5, $3
+; CHECK-NEXT: move $6, $2
; CHECK-NEXT: jal use_sret2
; CHECK-NEXT: nop
; CHECK-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload
@@ -92,19 +95,19 @@ start:
define internal { i32, i32, i32, i32, i32, i32 } @implicit_sret_impl2() unnamed_addr nounwind {
; CHECK-LABEL: implicit_sret_impl2:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $at_64 killed $a0_64
-; CHECK-NEXT: addiu $1, $zero, 6
-; CHECK-NEXT: sw $1, 20($4)
-; CHECK-NEXT: addiu $1, $zero, 5
-; CHECK-NEXT: sw $1, 16($4)
-; CHECK-NEXT: addiu $1, $zero, 4
-; CHECK-NEXT: sw $1, 12($4)
-; CHECK-NEXT: addiu $1, $zero, 3
-; CHECK-NEXT: sw $1, 8($4)
-; CHECK-NEXT: addiu $1, $zero, 2
-; CHECK-NEXT: sw $1, 4($4)
-; CHECK-NEXT: addiu $1, $zero, 1
-; CHECK-NEXT: sw $1, 0($4)
+; CHECK-NEXT: move $1, $4
+; CHECK-NEXT: addiu $2, $zero, 6
+; CHECK-NEXT: sw $2, 20($4)
+; CHECK-NEXT: addiu $2, $zero, 5
+; CHECK-NEXT: sw $2, 16($4)
+; CHECK-NEXT: addiu $2, $zero, 4
+; CHECK-NEXT: sw $2, 12($4)
+; CHECK-NEXT: addiu $2, $zero, 3
+; CHECK-NEXT: sw $2, 8($4)
+; CHECK-NEXT: addiu $2, $zero, 2
+; CHECK-NEXT: sw $2, 4($4)
+; CHECK-NEXT: addiu $2, $zero, 1
+; CHECK-NEXT: sw $2, 0($4)
; CHECK-NEXT: jr $ra
; CHECK-NEXT: nop
ret { i32, i32, i32, i32, i32, i32 } { i32 1, i32 2, i32 3, i32 4, i32 5, i32 6 }
diff --git a/llvm/test/CodeGen/Mips/micromips-eva.mir b/llvm/test/CodeGen/Mips/micromips-eva.mir
index c4d05cf6985e..fd30529f7097 100644
--- a/llvm/test/CodeGen/Mips/micromips-eva.mir
+++ b/llvm/test/CodeGen/Mips/micromips-eva.mir
@@ -196,19 +196,19 @@ body: |
...
-# CHECK: 60 22 60 05 lbue $1, 5($2)
-# CHECK: 60 22 68 05 lbe $1, 5($2)
-# CHECK: 60 22 a8 03 sbe $1, 3($2)
+# CHECK: 60 41 60 05 lbue $2, 5($1)
+# CHECK: 60 41 68 05 lbe $2, 5($1)
+# CHECK: 60 41 a8 03 sbe $2, 3($1)
-# CHECK: 60 22 62 0a lhue $1, 10($2)
-# CHECK: 60 22 6a 0a lhe $1, 10($2)
-# CHECK: 60 22 aa 06 she $1, 6($2)
+# CHECK: 60 41 62 0a lhue $2, 10($1)
+# CHECK: 60 41 6a 0a lhe $2, 10($1)
+# CHECK: 60 41 aa 06 she $2, 6($1)
-# CHECK: 60 22 6e 14 lwe $1, 20($2)
-# CHECK: 60 22 ae 0c swe $1, 12($2)
+# CHECK: 60 41 6e 14 lwe $2, 20($1)
+# CHECK: 60 41 ae 0c swe $2, 12($1)
-# CHECK: 60 22 6c 00 lle $1, 0($2)
-# CHECK: 60 22 ac 00 sce $1, 0($2)
+# CHECK: 60 41 6c 00 lle $2, 0($1)
+# CHECK: 60 81 ac 00 sce $4, 0($1)
# CHECK: 60 41 a6 05 cachee 2, 5($1)
# CHECK: 60 41 a4 05 prefe 2, 5($1)
diff --git a/llvm/test/CodeGen/Mips/msa/ldr_str.ll b/llvm/test/CodeGen/Mips/msa/ldr_str.ll
index 51c8bcd3fdbc..8bebd9481625 100644
--- a/llvm/test/CodeGen/Mips/msa/ldr_str.ll
+++ b/llvm/test/CodeGen/Mips/msa/ldr_str.ll
@@ -11,47 +11,47 @@
define void @llvm_mips_ldr_d_test(<2 x i64>* %val, i8* %ptr) nounwind {
; MIPS32R5-EB-LABEL: llvm_mips_ldr_d_test:
; MIPS32R5-EB: # %bb.0: # %entry
-; MIPS32R5-EB-NEXT: # implicit-def: $v0
-; MIPS32R5-EB-NEXT: lwr $2, 23($5)
-; MIPS32R5-EB-NEXT: lwl $2, 20($5)
; MIPS32R5-EB-NEXT: # implicit-def: $at
-; MIPS32R5-EB-NEXT: lwr $1, 19($5)
-; MIPS32R5-EB-NEXT: lwl $1, 16($5)
-; MIPS32R5-EB-NEXT: fill.w $w0, $2
-; MIPS32R5-EB-NEXT: insert.w $w0[1], $1
+; MIPS32R5-EB-NEXT: lwr $1, 23($5)
+; MIPS32R5-EB-NEXT: lwl $1, 20($5)
+; MIPS32R5-EB-NEXT: # implicit-def: $v0
+; MIPS32R5-EB-NEXT: lwr $2, 19($5)
+; MIPS32R5-EB-NEXT: lwl $2, 16($5)
+; MIPS32R5-EB-NEXT: fill.w $w0, $1
+; MIPS32R5-EB-NEXT: insert.w $w0[1], $2
; MIPS32R5-EB-NEXT: st.d $w0, 0($4)
; MIPS32R5-EB-NEXT: jr $ra
; MIPS32R5-EB-NEXT: nop
;
; MIPS32R5-EL-LABEL: llvm_mips_ldr_d_test:
; MIPS32R5-EL: # %bb.0: # %entry
-; MIPS32R5-EL-NEXT: # implicit-def: $v0
-; MIPS32R5-EL-NEXT: lwr $2, 16($5)
-; MIPS32R5-EL-NEXT: lwl $2, 19($5)
; MIPS32R5-EL-NEXT: # implicit-def: $at
-; MIPS32R5-EL-NEXT: lwr $1, 20($5)
-; MIPS32R5-EL-NEXT: lwl $1, 23($5)
-; MIPS32R5-EL-NEXT: fill.w $w0, $2
-; MIPS32R5-EL-NEXT: insert.w $w0[1], $1
+; MIPS32R5-EL-NEXT: lwr $1, 16($5)
+; MIPS32R5-EL-NEXT: lwl $1, 19($5)
+; MIPS32R5-EL-NEXT: # implicit-def: $v0
+; MIPS32R5-EL-NEXT: lwr $2, 20($5)
+; MIPS32R5-EL-NEXT: lwl $2, 23($5)
+; MIPS32R5-EL-NEXT: fill.w $w0, $1
+; MIPS32R5-EL-NEXT: insert.w $w0[1], $2
; MIPS32R5-EL-NEXT: st.d $w0, 0($4)
; MIPS32R5-EL-NEXT: jr $ra
; MIPS32R5-EL-NEXT: nop
;
; MIPS32R6-EB-LABEL: llvm_mips_ldr_d_test:
; MIPS32R6-EB: # %bb.0: # %entry
-; MIPS32R6-EB-NEXT: lw $2, 20($5)
-; MIPS32R6-EB-NEXT: lw $1, 16($5)
-; MIPS32R6-EB-NEXT: fill.w $w0, $2
-; MIPS32R6-EB-NEXT: insert.w $w0[1], $1
+; MIPS32R6-EB-NEXT: lw $1, 20($5)
+; MIPS32R6-EB-NEXT: lw $2, 16($5)
+; MIPS32R6-EB-NEXT: fill.w $w0, $1
+; MIPS32R6-EB-NEXT: insert.w $w0[1], $2
; MIPS32R6-EB-NEXT: st.d $w0, 0($4)
; MIPS32R6-EB-NEXT: jrc $ra
;
; MIPS32R6-EL-LABEL: llvm_mips_ldr_d_test:
; MIPS32R6-EL: # %bb.0: # %entry
-; MIPS32R6-EL-NEXT: lw $2, 16($5)
-; MIPS32R6-EL-NEXT: lw $1, 20($5)
-; MIPS32R6-EL-NEXT: fill.w $w0, $2
-; MIPS32R6-EL-NEXT: insert.w $w0[1], $1
+; MIPS32R6-EL-NEXT: lw $1, 16($5)
+; MIPS32R6-EL-NEXT: lw $2, 20($5)
+; MIPS32R6-EL-NEXT: fill.w $w0, $1
+; MIPS32R6-EL-NEXT: insert.w $w0[1], $2
; MIPS32R6-EL-NEXT: st.d $w0, 0($4)
; MIPS32R6-EL-NEXT: jrc $ra
;
@@ -122,43 +122,43 @@ define void @llvm_mips_str_d_test(<2 x i64>* %val, i8* %ptr) nounwind {
; MIPS32R5-EB-LABEL: llvm_mips_str_d_test:
; MIPS32R5-EB: # %bb.0: # %entry
; MIPS32R5-EB-NEXT: ld.d $w0, 0($4)
-; MIPS32R5-EB-NEXT: copy_s.w $2, $w0[0]
-; MIPS32R5-EB-NEXT: copy_s.w $1, $w0[1]
-; MIPS32R5-EB-NEXT: swr $2, 19($5)
-; MIPS32R5-EB-NEXT: swl $2, 16($5)
-; MIPS32R5-EB-NEXT: swr $1, 23($5)
-; MIPS32R5-EB-NEXT: swl $1, 20($5)
+; MIPS32R5-EB-NEXT: copy_s.w $1, $w0[0]
+; MIPS32R5-EB-NEXT: copy_s.w $2, $w0[1]
+; MIPS32R5-EB-NEXT: swr $1, 19($5)
+; MIPS32R5-EB-NEXT: swl $1, 16($5)
+; MIPS32R5-EB-NEXT: swr $2, 23($5)
+; MIPS32R5-EB-NEXT: swl $2, 20($5)
; MIPS32R5-EB-NEXT: jr $ra
; MIPS32R5-EB-NEXT: nop
;
; MIPS32R5-EL-LABEL: llvm_mips_str_d_test:
; MIPS32R5-EL: # %bb.0: # %entry
; MIPS32R5-EL-NEXT: ld.d $w0, 0($4)
-; MIPS32R5-EL-NEXT: copy_s.w $2, $w0[0]
-; MIPS32R5-EL-NEXT: copy_s.w $1, $w0[1]
-; MIPS32R5-EL-NEXT: swr $2, 16($5)
-; MIPS32R5-EL-NEXT: swl $2, 19($5)
-; MIPS32R5-EL-NEXT: swr $1, 20($5)
-; MIPS32R5-EL-NEXT: swl $1, 23($5)
+; MIPS32R5-EL-NEXT: copy_s.w $1, $w0[0]
+; MIPS32R5-EL-NEXT: copy_s.w $2, $w0[1]
+; MIPS32R5-EL-NEXT: swr $1, 16($5)
+; MIPS32R5-EL-NEXT: swl $1, 19($5)
+; MIPS32R5-EL-NEXT: swr $2, 20($5)
+; MIPS32R5-EL-NEXT: swl $2, 23($5)
; MIPS32R5-EL-NEXT: jr $ra
; MIPS32R5-EL-NEXT: nop
;
; MIPS32R6-EB-LABEL: llvm_mips_str_d_test:
; MIPS32R6-EB: # %bb.0: # %entry
; MIPS32R6-EB-NEXT: ld.d $w0, 0($4)
-; MIPS32R6-EB-NEXT: copy_s.w $2, $w0[0]
-; MIPS32R6-EB-NEXT: copy_s.w $1, $w0[1]
-; MIPS32R6-EB-NEXT: sw $2, 20($5)
-; MIPS32R6-EB-NEXT: sw $1, 16($5)
+; MIPS32R6-EB-NEXT: copy_s.w $1, $w0[0]
+; MIPS32R6-EB-NEXT: copy_s.w $2, $w0[1]
+; MIPS32R6-EB-NEXT: sw $1, 20($5)
+; MIPS32R6-EB-NEXT: sw $2, 16($5)
; MIPS32R6-EB-NEXT: jrc $ra
;
; MIPS32R6-EL-LABEL: llvm_mips_str_d_test:
; MIPS32R6-EL: # %bb.0: # %entry
; MIPS32R6-EL-NEXT: ld.d $w0, 0($4)
-; MIPS32R6-EL-NEXT: copy_s.w $2, $w0[0]
-; MIPS32R6-EL-NEXT: copy_s.w $1, $w0[1]
-; MIPS32R6-EL-NEXT: sw $2, 16($5)
-; MIPS32R6-EL-NEXT: sw $1, 20($5)
+; MIPS32R6-EL-NEXT: copy_s.w $1, $w0[0]
+; MIPS32R6-EL-NEXT: copy_s.w $2, $w0[1]
+; MIPS32R6-EL-NEXT: sw $1, 16($5)
+; MIPS32R6-EL-NEXT: sw $2, 20($5)
; MIPS32R6-EL-NEXT: jrc $ra
;
; MIPS64R6-LABEL: llvm_mips_str_d_test:
diff --git a/llvm/test/CodeGen/PowerPC/addegluecrash.ll b/llvm/test/CodeGen/PowerPC/addegluecrash.ll
index 2338ca9ded04..c38f377869f8 100644
--- a/llvm/test/CodeGen/PowerPC/addegluecrash.ll
+++ b/llvm/test/CodeGen/PowerPC/addegluecrash.ll
@@ -6,30 +6,27 @@ target triple = "powerpc64le-unknown-linux-gnu"
define void @bn_mul_comba8(i64* nocapture %r, i64* nocapture readonly %a, i64* nocapture readonly %b) {
; CHECK-LABEL: bn_mul_comba8:
; CHECK: # %bb.0:
-; CHECK-NEXT: std 4, -8(1) # 8-byte Folded Spill
-; CHECK-NEXT: mr 4, 3
-; CHECK-NEXT: ld 3, -8(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 9, 0(3)
-; CHECK-NEXT: ld 8, 0(5)
-; CHECK-NEXT: mulhdu 7, 8, 9
-; CHECK-NEXT: ld 3, 8(3)
-; CHECK-NEXT: mulld 6, 3, 9
-; CHECK-NEXT: mulhdu 3, 3, 9
-; CHECK-NEXT: addc 6, 6, 7
-; CHECK-NEXT: addze 3, 3
+; CHECK-NEXT: ld 6, 0(4)
+; CHECK-NEXT: ld 7, 0(5)
+; CHECK-NEXT: mulhdu 8, 7, 6
+; CHECK-NEXT: ld 4, 8(4)
+; CHECK-NEXT: mulld 9, 4, 6
+; CHECK-NEXT: mulhdu 4, 4, 6
+; CHECK-NEXT: addc 6, 9, 8
+; CHECK-NEXT: addze 4, 4
; CHECK-NEXT: ld 5, 8(5)
-; CHECK-NEXT: mulld 7, 5, 8
-; CHECK-NEXT: mulhdu 5, 5, 8
-; CHECK-NEXT: addc 6, 6, 7
+; CHECK-NEXT: mulld 8, 5, 7
+; CHECK-NEXT: mulhdu 5, 5, 7
+; CHECK-NEXT: addc 6, 6, 8
; CHECK-NEXT: addze 5, 5
-; CHECK-NEXT: add 3, 5, 3
-; CHECK-NEXT: cmpld 7, 3, 5
-; CHECK-NEXT: mfocrf 3, 1
-; CHECK-NEXT: rlwinm 5, 3, 29, 31, 31
-; CHECK-NEXT: # implicit-def: $x3
-; CHECK-NEXT: mr 3, 5
-; CHECK-NEXT: clrldi 3, 3, 32
-; CHECK-NEXT: std 3, 0(4)
+; CHECK-NEXT: add 4, 5, 4
+; CHECK-NEXT: cmpld 7, 4, 5
+; CHECK-NEXT: mfocrf 4, 1
+; CHECK-NEXT: rlwinm 4, 4, 29, 31, 31
+; CHECK-NEXT: # implicit-def: $x5
+; CHECK-NEXT: mr 5, 4
+; CHECK-NEXT: clrldi 4, 5, 32
+; CHECK-NEXT: std 4, 0(3)
; CHECK-NEXT: blr
%1 = load i64, i64* %a, align 8
%conv = zext i64 %1 to i128
diff --git a/llvm/test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll b/llvm/test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll
index d55d78866547..95dd58f513cc 100644
--- a/llvm/test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll
+++ b/llvm/test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll
@@ -9,7 +9,7 @@ entry:
lnext:
%elementArray = load i32*, i32** %elementArrayPtr, align 8
-; CHECK: lwz [[LDREG:[0-9]+]], 140(1) # 4-byte Folded Reload
+; CHECK: lwz [[LDREG:[0-9]+]], 124(1) # 4-byte Folded Reload
; CHECK: # implicit-def: $x[[TEMPREG:[0-9]+]]
%element = load i32, i32* %elementArray, align 4
; CHECK: mr [[TEMPREG]], [[LDREG]]
diff --git a/llvm/test/CodeGen/PowerPC/aix-overflow-toc.py b/llvm/test/CodeGen/PowerPC/aix-overflow-toc.py
index 85776b760948..e04491bff2fb 100644
--- a/llvm/test/CodeGen/PowerPC/aix-overflow-toc.py
+++ b/llvm/test/CodeGen/PowerPC/aix-overflow-toc.py
@@ -28,41 +28,41 @@
print("}")
# 32-bit assembly check
-# ASM32: lwz 4, L..C0(2)
-# ASM32: lwz 4, L..C1(2)
+# ASM32: lwz 3, L..C0(2)
+# ASM32: lwz 3, L..C1(2)
-# ASM32: lwz 4, L..C8191(2)
-# ASM32: lwz 4, L..C8192-65536(2)
-# ASM32: lwz 4, L..C8193-65536(2)
+# ASM32: lwz 3, L..C8191(2)
+# ASM32: lwz 3, L..C8192-65536(2)
+# ASM32: lwz 3, L..C8193-65536(2)
-# ASM32: lwz 4, L..C12288-65536(2)
-# ASM32: lwz 4, L..C12289-65536(2)
+# ASM32: lwz 3, L..C12288-65536(2)
+# ASM32: lwz 3, L..C12289-65536(2)
# 64-bit assembly check
-# ASM64: ld 4, L..C0(2)
-# ASM64: ld 4, L..C1(2)
+# ASM64: ld 3, L..C0(2)
+# ASM64: ld 3, L..C1(2)
-# ASM64: ld 4, L..C4095(2)
-# ASM64: ld 4, L..C4096-65536(2)
-# ASM64: ld 4, L..C4097-65536(2)
+# ASM64: ld 3, L..C4095(2)
+# ASM64: ld 3, L..C4096-65536(2)
+# ASM64: ld 3, L..C4097-65536(2)
-# ASM64: ld 4, L..C12287-65536(2)
-# ASM64: ld 4, L..C12288-131072(2)
-# ASM64: ld 4, L..C12289-131072(2)
+# ASM64: ld 3, L..C12287-65536(2)
+# ASM64: ld 3, L..C12288-131072(2)
+# ASM64: ld 3, L..C12289-131072(2)
-# DIS32: 0: 80 82 00 00 lwz 4, 0(2)
+# DIS32: 0: 80 62 00 00 lwz 3, 0(2)
# DIS32: 00000002: R_TOC (idx: 24590) a0[TC]
-# DIS32: c: 80 82 00 04 lwz 4, 4(2)
+# DIS32: c: 80 62 00 04 lwz 3, 4(2)
# DIS32: 0000000e: R_TOC (idx: 24592) a1[TC]
-# DIS32: fffc: 80 82 7f fc lwz 4, 32764(2)
+# DIS32: fffc: 80 62 7f fc lwz 3, 32764(2)
# DIS32: 0000fffe: R_TOC (idx: 40972) a8191[TC]
-# DIS32: 10004: 80 82 80 00 lwz 4, -32768(2)
+# DIS32: 10004: 80 62 80 00 lwz 3, -32768(2)
# DIS32: 00010006: R_TOC (idx: 40974) a8192[TC]
-# DIS32: 1000c: 80 82 80 04 lwz 4, -32764(2)
+# DIS32: 1000c: 80 62 80 04 lwz 3, -32764(2)
# DIS32: 0001000e: R_TOC (idx: 40976) a8193[TC]
-# DIS32: 18004: 80 82 c0 00 lwz 4, -16384(2)
+# DIS32: 18004: 80 62 c0 00 lwz 3, -16384(2)
# DIS32: 00018006: R_TOC (idx: 49166) a12288[TC]
-# DIS32: 1800c: 80 82 c0 04 lwz 4, -16380(2)
+# DIS32: 1800c: 80 62 c0 04 lwz 3, -16380(2)
# DIS32: 0001800e: R_TOC (idx: 49168) a12289[TC]
diff --git a/llvm/test/CodeGen/PowerPC/anon_aggr.ll b/llvm/test/CodeGen/PowerPC/anon_aggr.ll
index cc07c4843655..17f4ed46697d 100644
--- a/llvm/test/CodeGen/PowerPC/anon_aggr.ll
+++ b/llvm/test/CodeGen/PowerPC/anon_aggr.ll
@@ -19,9 +19,9 @@ unequal:
}
; CHECK-LABEL: func1:
+; CHECK: cmpld {{([0-9]+,)?}}4, 5
; CHECK-DAG: std 3, -[[OFFSET1:[0-9]+]]
; CHECK-DAG: std 5, -[[OFFSET2:[0-9]+]]
-; CHECK: cmpld {{([0-9]+,)?}}4, 5
; CHECK: ld 3, -[[OFFSET1]](1)
; CHECK: ld 3, -[[OFFSET2]](1)
@@ -38,13 +38,13 @@ unequal:
ret i8* %array2_ptr
}
; CHECK-LABEL: func2:
-; CHECK-DAG: cmpld {{([0-9]+,)?}}4, 3
+; CHECK-DAG: cmpld {{([0-9]+,)?}}4, 5
; CHECK-DAG: std 6, 72(1)
; CHECK-DAG: std 5, 64(1)
-; CHECK-DAG: std 3, -[[OFFSET1:[0-9]+]]
+; CHECK-DAG: std 5, -[[OFFSET1:[0-9]+]]
; CHECK-DAG: std 3, -[[OFFSET2:[0-9]+]]
-; CHECK: ld 3, -[[OFFSET1]](1)
; CHECK: ld 3, -[[OFFSET2]](1)
+; CHECK: ld 3, -[[OFFSET1]](1)
define i8* @func3({ i64, i8* }* byval %array1, %tarray* byval %array2) {
entry:
@@ -85,9 +85,9 @@ unequal:
; CHECK-LABEL: func4:
; CHECK-DAG: ld [[REG2:[0-9]+]], 120(1)
; CHECK-DAG: ld [[REG3:[0-9]+]], 136(1)
-; CHECK-DAG: std [[REG2]], -[[OFFSET1:[0-9]+]](1)
+; CHECK-DAG: cmpld {{([0-9]+,)?}}[[REG2]], [[REG3]]
+; CHECK: std [[REG2]], -[[OFFSET1:[0-9]+]](1)
; CHECK: std [[REG3]], -[[OFFSET2:[0-9]+]](1)
-; CHECK: cmpld {{([0-9]+,)?}}[[REG2]], [[REG3]]
; CHECK: ld 3, -[[OFFSET1]](1)
; CHECK: ld 3, -[[OFFSET2]](1)
diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
index 17617e90a01f..ca25afa458aa 100644
--- a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
@@ -248,7 +248,8 @@ define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %
;
; CHECK-O0-LABEL: vec_xl_zext:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: lxvrbx v2, r4, r3
+; CHECK-O0-NEXT: lxvrbx vs0, r4, r3
+; CHECK-O0-NEXT: xxlor v2, vs0, vs0
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
@@ -268,7 +269,8 @@ define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture rea
; CHECK-O0-LABEL: vec_xl_zext_short:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: sldi r3, r3, 1
-; CHECK-O0-NEXT: lxvrhx v2, r4, r3
+; CHECK-O0-NEXT: lxvrhx vs0, r4, r3
+; CHECK-O0-NEXT: xxlor v2, vs0, vs0
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
@@ -288,7 +290,8 @@ define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture read
; CHECK-O0-LABEL: vec_xl_zext_word:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: sldi r3, r3, 2
-; CHECK-O0-NEXT: lxvrwx v2, r4, r3
+; CHECK-O0-NEXT: lxvrwx vs0, r4, r3
+; CHECK-O0-NEXT: xxlor v2, vs0, vs0
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
@@ -308,7 +311,8 @@ define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readon
; CHECK-O0-LABEL: vec_xl_zext_dw:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: sldi r3, r3, 3
-; CHECK-O0-NEXT: lxvrdx v2, r4, r3
+; CHECK-O0-NEXT: lxvrdx vs0, r4, r3
+; CHECK-O0-NEXT: xxlor v2, vs0, vs0
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset
@@ -330,9 +334,9 @@ define dso_local <1 x i128> @vec_xl_sext_b(i64 %offset, i8* %p) {
; CHECK-O0-LABEL: vec_xl_sext_b:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: lbzx r3, r4, r3
-; CHECK-O0-NEXT: extsb r4, r3
-; CHECK-O0-NEXT: sradi r3, r4, 63
-; CHECK-O0-NEXT: mtvsrdd v2, r3, r4
+; CHECK-O0-NEXT: extsb r3, r3
+; CHECK-O0-NEXT: sradi r4, r3, 63
+; CHECK-O0-NEXT: mtvsrdd v2, r4, r3
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i8, i8* %p, i64 %offset
@@ -354,9 +358,9 @@ define dso_local <1 x i128> @vec_xl_sext_h(i64 %offset, i16* %p) {
; CHECK-O0-LABEL: vec_xl_sext_h:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: sldi r3, r3, 1
-; CHECK-O0-NEXT: lhax r4, r4, r3
-; CHECK-O0-NEXT: sradi r3, r4, 63
-; CHECK-O0-NEXT: mtvsrdd v2, r3, r4
+; CHECK-O0-NEXT: lhax r3, r4, r3
+; CHECK-O0-NEXT: sradi r4, r3, 63
+; CHECK-O0-NEXT: mtvsrdd v2, r4, r3
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i16, i16* %p, i64 %offset
@@ -378,9 +382,9 @@ define dso_local <1 x i128> @vec_xl_sext_w(i64 %offset, i32* %p) {
; CHECK-O0-LABEL: vec_xl_sext_w:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: sldi r3, r3, 2
-; CHECK-O0-NEXT: lwax r4, r4, r3
-; CHECK-O0-NEXT: sradi r3, r4, 63
-; CHECK-O0-NEXT: mtvsrdd v2, r3, r4
+; CHECK-O0-NEXT: lwax r3, r4, r3
+; CHECK-O0-NEXT: sradi r4, r3, 63
+; CHECK-O0-NEXT: mtvsrdd v2, r4, r3
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i32, i32* %p, i64 %offset
@@ -402,9 +406,9 @@ define dso_local <1 x i128> @vec_xl_sext_d(i64 %offset, i64* %p) {
; CHECK-O0-LABEL: vec_xl_sext_d:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: sldi r3, r3, 3
-; CHECK-O0-NEXT: ldx r4, r4, r3
-; CHECK-O0-NEXT: sradi r3, r4, 63
-; CHECK-O0-NEXT: mtvsrdd v2, r3, r4
+; CHECK-O0-NEXT: ldx r3, r4, r3
+; CHECK-O0-NEXT: sradi r4, r3, 63
+; CHECK-O0-NEXT: mtvsrdd v2, r4, r3
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i64, i64* %p, i64 %offset
diff --git a/llvm/test/CodeGen/PowerPC/elf-common.ll b/llvm/test/CodeGen/PowerPC/elf-common.ll
index 722b4803ca3a..cc73d9b58b54 100644
--- a/llvm/test/CodeGen/PowerPC/elf-common.ll
+++ b/llvm/test/CodeGen/PowerPC/elf-common.ll
@@ -6,7 +6,7 @@
; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 < %s | FileCheck -check-prefix=PIC %s
-; Test correct code generation for static and pic for loading and storing a common symbol
+; Test correct code generation for static and pic for loading and storing a common symbol
@comm_glob = common global i32 0, align 4
@@ -14,11 +14,11 @@ define signext i32 @test_comm() nounwind {
; NOOPT-LABEL: test_comm:
; NOOPT: # %bb.0: # %entry
; NOOPT-NEXT: addis 3, 2, comm_glob at toc@ha
-; NOOPT-NEXT: addi 5, 3, comm_glob at toc@l
-; NOOPT-NEXT: lwz 3, 0(5)
-; NOOPT-NEXT: addi 4, 3, 1
-; NOOPT-NEXT: stw 4, 0(5)
-; NOOPT-NEXT: extsw 3, 3
+; NOOPT-NEXT: addi 3, 3, comm_glob at toc@l
+; NOOPT-NEXT: lwz 4, 0(3)
+; NOOPT-NEXT: addi 5, 4, 1
+; NOOPT-NEXT: stw 5, 0(3)
+; NOOPT-NEXT: extsw 3, 4
; NOOPT-NEXT: blr
;
; STATIC-LABEL: test_comm:
diff --git a/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll b/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll
index 484162d089e5..3758f8db10ce 100644
--- a/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll
+++ b/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll
@@ -22,12 +22,12 @@ define internal void @loadFP(double* %d) #0 {
; CHECK-NEXT: paddi r3, 0, .L.str at PCREL, 1
; CHECK-NEXT: bl printf at notoc
; CHECK-NEXT: ld r4, 104(r1)
-; CHECK-NEXT: lis r3, 16403
-; CHECK-NEXT: ori r3, r3, 62914
-; CHECK-NEXT: sldi r3, r3, 32
-; CHECK-NEXT: oris r3, r3, 36700
-; CHECK-NEXT: ori r3, r3, 10486
-; CHECK-NEXT: std r3, 0(r4)
+; CHECK-NEXT: lis r5, 16403
+; CHECK-NEXT: ori r5, r5, 62914
+; CHECK-NEXT: sldi r5, r5, 32
+; CHECK-NEXT: oris r5, r5, 36700
+; CHECK-NEXT: ori r5, r5, 10486
+; CHECK-NEXT: std r5, 0(r4)
; CHECK-NEXT: addi r1, r1, 112
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
@@ -50,14 +50,16 @@ define internal void @loadGV() #0 {
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: std r0, 16(r1)
-; CHECK-NEXT: stdu r1, -96(r1)
+; CHECK-NEXT: stdu r1, -112(r1)
; CHECK-NEXT: paddi r3, 0, .L.str.1 at PCREL, 1
; CHECK-NEXT: bl printf at notoc
-; CHECK-NEXT: pld r3, stdout at got@pcrel(0), 1
-; CHECK-NEXT: ld r4, 0(r3)
-; CHECK-NEXT: li r3, 97
+; CHECK-NEXT: pld r4, stdout at got@pcrel(0), 1
+; CHECK-NEXT: ld r4, 0(r4)
+; CHECK-NEXT: li r5, 97
+; CHECK-NEXT: std r3, 104(r1) # 8-byte Folded Spill
+; CHECK-NEXT: mr r3, r5
; CHECK-NEXT: bl _IO_putc at notoc
-; CHECK-NEXT: addi r1, r1, 96
+; CHECK-NEXT: addi r1, r1, 112
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll b/llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll
index b46b1409da7d..47c05b56c2fa 100644
--- a/llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll
@@ -29,7 +29,8 @@ entry:
define float @f_i128_fi_nsz(float %v) #0 {
; CHECK-LABEL: f_i128_fi_nsz:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xsrdpiz 1, 1
+; CHECK-NEXT: xsrdpiz 0, 1
+; CHECK-NEXT: fmr 1, 0
; CHECK-NEXT: blr
entry:
%a = fptosi float %v to i128
diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll
index 6b6703f0cbba..e6bc0f4bd769 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll
@@ -12,17 +12,17 @@ define i32 @une_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
; CHECK-NEXT: mfocrf r4, 1
; CHECK-NEXT: rlwinm r4, r4, 31, 31, 31
; CHECK-NEXT: xori r4, r4, 1
-; CHECK-NEXT: and r4, r3, r4
+; CHECK-NEXT: and r3, r3, r4
; CHECK-NEXT: xscmpudp cr7, f1, f3
-; CHECK-NEXT: mfocrf r3, 1
-; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31
-; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: mfocrf r4, 1
+; CHECK-NEXT: rlwinm r4, r4, 31, 31, 31
+; CHECK-NEXT: xori r4, r4, 1
; CHECK-NEXT: xscmpudp cr7, f1, f3
; CHECK-NEXT: mfocrf r5, 1
; CHECK-NEXT: rlwinm r5, r5, 31, 31, 31
; CHECK-NEXT: xori r5, r5, 1
-; CHECK-NEXT: and r3, r3, r5
-; CHECK-NEXT: or r3, r3, r4
+; CHECK-NEXT: and r4, r4, r5
+; CHECK-NEXT: or r3, r4, r3
; CHECK-NEXT: # kill: def $r4 killed $r3
; CHECK-NEXT: clrldi r3, r3, 32
; CHECK-NEXT: blr
@@ -42,21 +42,23 @@ define i32 @ogt_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
; CHECK-NEXT: xscmpudp cr7, f2, f4
; CHECK-NEXT: mfocrf r4, 1
; CHECK-NEXT: rlwinm r4, r4, 30, 31, 31
-; CHECK-NEXT: and r4, r3, r4
-; CHECK-NEXT: xscmpudp cr0, f1, f3
-; CHECK-NEXT: mfocrf r3, 128
-; CHECK-NEXT: stw r3, -4(r1)
+; CHECK-NEXT: and r3, r3, r4
; CHECK-NEXT: xscmpudp cr7, f1, f3
-; CHECK-NEXT: mfocrf r3, 1
+; CHECK-NEXT: xscmpudp cr0, f1, f3
+; CHECK-NEXT: mfocrf r4, 1
+; CHECK-NEXT: rotlwi r4, r4, 28
+; CHECK-NEXT: stw r4, -4(r1)
+; CHECK-NEXT: mcrf cr7, cr0
+; CHECK-NEXT: mfocrf r4, 1
+; CHECK-NEXT: rlwinm r4, r4, 30, 31, 31
; CHECK-NEXT: lwz r5, -4(r1)
; CHECK-NEXT: rotlwi r5, r5, 4
; CHECK-NEXT: mtocrf 1, r5
-; CHECK-NEXT: rlwinm r5, r3, 30, 31, 31
-; CHECK-NEXT: mfocrf r3, 1
-; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31
-; CHECK-NEXT: xori r3, r3, 1
-; CHECK-NEXT: and r3, r3, r5
-; CHECK-NEXT: or r3, r3, r4
+; CHECK-NEXT: mfocrf r5, 1
+; CHECK-NEXT: rlwinm r5, r5, 31, 31, 31
+; CHECK-NEXT: xori r5, r5, 1
+; CHECK-NEXT: and r4, r5, r4
+; CHECK-NEXT: or r3, r4, r3
; CHECK-NEXT: # kill: def $r4 killed $r3
; CHECK-NEXT: clrldi r3, r3, 32
; CHECK-NEXT: blr
@@ -72,8 +74,9 @@ define i1 @test_f128(fp128 %a, fp128 %b) #0 {
; CHECK-NEXT: xscmpuqp cr7, v2, v3
; CHECK-NEXT: mfocrf r3, 1
; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31
-; CHECK-NEXT: xori r4, r3, 1
-; CHECK-NEXT: # implicit-def: $x3
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: # implicit-def: $x4
+; CHECK-NEXT: mr r4, r3
; CHECK-NEXT: mr r3, r4
; CHECK-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/PowerPC/fp64-to-int16.ll b/llvm/test/CodeGen/PowerPC/fp64-to-int16.ll
index 627db54ef09f..27c6e71ba803 100644
--- a/llvm/test/CodeGen/PowerPC/fp64-to-int16.ll
+++ b/llvm/test/CodeGen/PowerPC/fp64-to-int16.ll
@@ -9,8 +9,9 @@ define i1 @Test(double %a) {
; CHECK-NEXT: mffprwz 3, 0
; CHECK-NEXT: xori 3, 3, 65534
; CHECK-NEXT: cntlzw 3, 3
-; CHECK-NEXT: srwi 4, 3, 5
-; CHECK-NEXT: # implicit-def: $x3
+; CHECK-NEXT: srwi 3, 3, 5
+; CHECK-NEXT: # implicit-def: $x4
+; CHECK-NEXT: mr 4, 3
; CHECK-NEXT: mr 3, 4
; CHECK-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll b/llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll
index 59311dbb2f5f..8844f621ee8f 100644
--- a/llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll
+++ b/llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll
@@ -145,37 +145,19 @@ entry:
}
define <8 x i16> @shuffle_vector_halfword_8_1(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_halfword_8_1:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 6
-; CHECK-OPT-NEXT: vinserth 3, 2, 14
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_halfword_8_1:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 6
-; CHECK-O0-NEXT: vinserth 2, 3, 14
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_8_1:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 12
-; CHECK-BE-OPT-NEXT: vinserth 3, 2, 0
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_halfword_8_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 6
+; CHECK-NEXT: vinserth 3, 2, 14
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_halfword_8_1:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 12
-; CHECK-BE-O0-NEXT: vinserth 2, 3, 0
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_halfword_8_1:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 12
+; CHECK-BE-NEXT: vinserth 3, 2, 0
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <8 x i16> %vecins
@@ -184,255 +166,131 @@ entry:
; The following testcases take one halfword element from the first vector and
; inserts it at various locations in the second vector
define <8 x i16> @shuffle_vector_halfword_9_7(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_halfword_9_7:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 10
-; CHECK-OPT-NEXT: vinserth 3, 2, 12
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_halfword_9_7:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 10
-; CHECK-O0-NEXT: vinserth 2, 3, 12
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_9_7:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 8
-; CHECK-BE-OPT-NEXT: vinserth 3, 2, 2
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_halfword_9_7:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 10
+; CHECK-NEXT: vinserth 3, 2, 12
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_halfword_9_7:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 8
-; CHECK-BE-O0-NEXT: vinserth 2, 3, 2
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_halfword_9_7:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 8
+; CHECK-BE-NEXT: vinserth 3, 2, 2
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 7, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_10_4(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_halfword_10_4:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vinserth 3, 2, 10
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_halfword_10_4:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vinserth 2, 3, 10
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_10_4:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 2
-; CHECK-BE-OPT-NEXT: vinserth 3, 2, 4
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_halfword_10_4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vinserth 3, 2, 10
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_halfword_10_4:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 2
-; CHECK-BE-O0-NEXT: vinserth 2, 3, 4
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_halfword_10_4:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 2
+; CHECK-BE-NEXT: vinserth 3, 2, 4
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 4, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_11_2(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_halfword_11_2:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 4
-; CHECK-OPT-NEXT: vinserth 3, 2, 8
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_halfword_11_2:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 4
-; CHECK-O0-NEXT: vinserth 2, 3, 8
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_11_2:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 14
-; CHECK-BE-OPT-NEXT: vinserth 3, 2, 6
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_halfword_11_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 4
+; CHECK-NEXT: vinserth 3, 2, 8
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_halfword_11_2:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 14
-; CHECK-BE-O0-NEXT: vinserth 2, 3, 6
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_halfword_11_2:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 14
+; CHECK-BE-NEXT: vinserth 3, 2, 6
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 2, i32 12, i32 13, i32 14, i32 15>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_12_6(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_halfword_12_6:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 12
-; CHECK-OPT-NEXT: vinserth 3, 2, 6
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_halfword_12_6:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 12
-; CHECK-O0-NEXT: vinserth 2, 3, 6
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_12_6:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 6
-; CHECK-BE-OPT-NEXT: vinserth 3, 2, 8
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_halfword_12_6:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 12
+; CHECK-NEXT: vinserth 3, 2, 6
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_halfword_12_6:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 6
-; CHECK-BE-O0-NEXT: vinserth 2, 3, 8
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_halfword_12_6:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 6
+; CHECK-BE-NEXT: vinserth 3, 2, 8
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 6, i32 13, i32 14, i32 15>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_13_3(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_halfword_13_3:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 2
-; CHECK-OPT-NEXT: vinserth 3, 2, 4
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_halfword_13_3:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 2
-; CHECK-O0-NEXT: vinserth 2, 3, 4
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_13_3:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vinserth 3, 2, 10
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_halfword_13_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 2
+; CHECK-NEXT: vinserth 3, 2, 4
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_halfword_13_3:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vinserth 2, 3, 10
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_halfword_13_3:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vinserth 3, 2, 10
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 3, i32 14, i32 15>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_14_5(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_halfword_14_5:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 14
-; CHECK-OPT-NEXT: vinserth 3, 2, 2
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_halfword_14_5:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 14
-; CHECK-O0-NEXT: vinserth 2, 3, 2
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_14_5:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 4
-; CHECK-BE-OPT-NEXT: vinserth 3, 2, 12
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_halfword_14_5:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 14
+; CHECK-NEXT: vinserth 3, 2, 2
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_halfword_14_5:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 4
-; CHECK-BE-O0-NEXT: vinserth 2, 3, 12
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_halfword_14_5:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 4
+; CHECK-BE-NEXT: vinserth 3, 2, 12
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 5, i32 15>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_15_0(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_halfword_15_0:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 8
-; CHECK-OPT-NEXT: vinserth 3, 2, 0
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_halfword_15_0:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 8
-; CHECK-O0-NEXT: vinserth 2, 3, 0
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_15_0:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 10
-; CHECK-BE-OPT-NEXT: vinserth 3, 2, 14
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_halfword_15_0:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 8
+; CHECK-NEXT: vinserth 3, 2, 0
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_halfword_15_0:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 10
-; CHECK-BE-O0-NEXT: vinserth 2, 3, 14
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_halfword_15_0:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 10
+; CHECK-BE-NEXT: vinserth 3, 2, 14
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
ret <8 x i16> %vecins
@@ -860,588 +718,302 @@ entry:
; The following testcases take one byte element from the first vector and
; inserts it at various locations in the second vector
define <16 x i8> @shuffle_vector_byte_16_8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_byte_16_8:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vinsertb 3, 2, 15
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_byte_16_8:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vinsertb 2, 3, 15
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_byte_16_8:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 1
-; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 0
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_byte_16_8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vinsertb 3, 2, 15
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_byte_16_8:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 1
-; CHECK-BE-O0-NEXT: vinsertb 2, 3, 0
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_byte_16_8:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 1
+; CHECK-BE-NEXT: vinsertb 3, 2, 0
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %vecins
}
define <16 x i8> @shuffle_vector_byte_17_1(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_byte_17_1:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 7
-; CHECK-OPT-NEXT: vinsertb 3, 2, 14
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_byte_17_1:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 7
-; CHECK-O0-NEXT: vinsertb 2, 3, 14
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_byte_17_1:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 10
-; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 1
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_byte_17_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 7
+; CHECK-NEXT: vinsertb 3, 2, 14
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_byte_17_1:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 10
-; CHECK-BE-O0-NEXT: vinsertb 2, 3, 1
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_byte_17_1:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 10
+; CHECK-BE-NEXT: vinsertb 3, 2, 1
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %vecins
}
define <16 x i8> @shuffle_vector_byte_18_10(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_byte_18_10:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 14
-; CHECK-OPT-NEXT: vinsertb 3, 2, 13
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_byte_18_10:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 14
-; CHECK-O0-NEXT: vinsertb 2, 3, 13
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_byte_18_10:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 3
-; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 2
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_byte_18_10:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 14
+; CHECK-NEXT: vinsertb 3, 2, 13
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_byte_18_10:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 3
-; CHECK-BE-O0-NEXT: vinsertb 2, 3, 2
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_byte_18_10:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 3
+; CHECK-BE-NEXT: vinsertb 3, 2, 2
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 10, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %vecins
}
define <16 x i8> @shuffle_vector_byte_19_3(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_byte_19_3:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 5
-; CHECK-OPT-NEXT: vinsertb 3, 2, 12
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_byte_19_3:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 5
-; CHECK-O0-NEXT: vinsertb 2, 3, 12
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_byte_19_3:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 12
-; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 3
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_byte_19_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 5
+; CHECK-NEXT: vinsertb 3, 2, 12
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_byte_19_3:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 12
-; CHECK-BE-O0-NEXT: vinsertb 2, 3, 3
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_byte_19_3:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 12
+; CHECK-BE-NEXT: vinsertb 3, 2, 3
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %vecins
}
define <16 x i8> @shuffle_vector_byte_20_12(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_byte_20_12:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 12
-; CHECK-OPT-NEXT: vinsertb 3, 2, 11
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_byte_20_12:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 12
-; CHECK-O0-NEXT: vinsertb 2, 3, 11
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_byte_20_12:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 5
-; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 4
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_byte_20_12:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 12
+; CHECK-NEXT: vinsertb 3, 2, 11
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_byte_20_12:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 5
-; CHECK-BE-O0-NEXT: vinsertb 2, 3, 4
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_byte_20_12:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 5
+; CHECK-BE-NEXT: vinsertb 3, 2, 4
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 12, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %vecins
}
define <16 x i8> @shuffle_vector_byte_21_5(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_byte_21_5:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 3
-; CHECK-OPT-NEXT: vinsertb 3, 2, 10
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_byte_21_5:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 3
-; CHECK-O0-NEXT: vinsertb 2, 3, 10
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_byte_21_5:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 14
-; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 5
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_byte_21_5:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 3
+; CHECK-NEXT: vinsertb 3, 2, 10
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_byte_21_5:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 14
-; CHECK-BE-O0-NEXT: vinsertb 2, 3, 5
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_byte_21_5:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 14
+; CHECK-BE-NEXT: vinsertb 3, 2, 5
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 5, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %vecins
}
define <16 x i8> @shuffle_vector_byte_22_14(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_byte_22_14:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 10
-; CHECK-OPT-NEXT: vinsertb 3, 2, 9
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_byte_22_14:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 10
-; CHECK-O0-NEXT: vinsertb 2, 3, 9
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_byte_22_14:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 7
-; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 6
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_byte_22_14:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 10
+; CHECK-NEXT: vinsertb 3, 2, 9
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_byte_22_14:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 7
-; CHECK-BE-O0-NEXT: vinsertb 2, 3, 6
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_byte_22_14:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 7
+; CHECK-BE-NEXT: vinsertb 3, 2, 6
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 14, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %vecins
}
define <16 x i8> @shuffle_vector_byte_23_7(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_byte_23_7:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 1
-; CHECK-OPT-NEXT: vinsertb 3, 2, 8
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_byte_23_7:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 1
-; CHECK-O0-NEXT: vinsertb 2, 3, 8
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_byte_23_7:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 7
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_byte_23_7:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 1
+; CHECK-NEXT: vinsertb 3, 2, 8
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_byte_23_7:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vinsertb 2, 3, 7
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_byte_23_7:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vinsertb 3, 2, 7
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %vecins
}
define <16 x i8> @shuffle_vector_byte_24_0(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_byte_24_0:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 8
-; CHECK-OPT-NEXT: vinsertb 3, 2, 7
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_byte_24_0:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 8
-; CHECK-O0-NEXT: vinsertb 2, 3, 7
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_byte_24_0:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 9
-; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 8
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_byte_24_0:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 8
+; CHECK-NEXT: vinsertb 3, 2, 7
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_byte_24_0:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 9
-; CHECK-BE-O0-NEXT: vinsertb 2, 3, 8
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_byte_24_0:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 9
+; CHECK-BE-NEXT: vinsertb 3, 2, 8
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %vecins
}
define <16 x i8> @shuffle_vector_byte_25_9(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_byte_25_9:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 15
-; CHECK-OPT-NEXT: vinsertb 3, 2, 6
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_byte_25_9:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 15
-; CHECK-O0-NEXT: vinsertb 2, 3, 6
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_byte_25_9:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 2
-; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 9
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_byte_25_9:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 15
+; CHECK-NEXT: vinsertb 3, 2, 6
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_byte_25_9:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 2
-; CHECK-BE-O0-NEXT: vinsertb 2, 3, 9
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_byte_25_9:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 2
+; CHECK-BE-NEXT: vinsertb 3, 2, 9
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 9, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %vecins
}
define <16 x i8> @shuffle_vector_byte_26_2(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_byte_26_2:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 6
-; CHECK-OPT-NEXT: vinsertb 3, 2, 5
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_byte_26_2:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 6
-; CHECK-O0-NEXT: vinsertb 2, 3, 5
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_byte_26_2:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 11
-; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 10
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_byte_26_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 6
+; CHECK-NEXT: vinsertb 3, 2, 5
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_byte_26_2:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 11
-; CHECK-BE-O0-NEXT: vinsertb 2, 3, 10
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_byte_26_2:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 11
+; CHECK-BE-NEXT: vinsertb 3, 2, 10
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 2, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %vecins
}
define <16 x i8> @shuffle_vector_byte_27_11(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_byte_27_11:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 13
-; CHECK-OPT-NEXT: vinsertb 3, 2, 4
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_byte_27_11:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 13
-; CHECK-O0-NEXT: vinsertb 2, 3, 4
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_byte_27_11:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 4
-; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 11
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_byte_27_11:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 13
+; CHECK-NEXT: vinsertb 3, 2, 4
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_byte_27_11:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 4
-; CHECK-BE-O0-NEXT: vinsertb 2, 3, 11
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_byte_27_11:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 4
+; CHECK-BE-NEXT: vinsertb 3, 2, 11
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %vecins
}
define <16 x i8> @shuffle_vector_byte_28_4(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_byte_28_4:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 4
-; CHECK-OPT-NEXT: vinsertb 3, 2, 3
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_byte_28_4:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 4
-; CHECK-O0-NEXT: vinsertb 2, 3, 3
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_byte_28_4:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 13
-; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 12
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_byte_28_4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 4
+; CHECK-NEXT: vinsertb 3, 2, 3
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_byte_28_4:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 13
-; CHECK-BE-O0-NEXT: vinsertb 2, 3, 12
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_byte_28_4:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 13
+; CHECK-BE-NEXT: vinsertb 3, 2, 12
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 4, i32 29, i32 30, i32 31>
ret <16 x i8> %vecins
}
define <16 x i8> @shuffle_vector_byte_29_13(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_byte_29_13:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 11
-; CHECK-OPT-NEXT: vinsertb 3, 2, 2
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_byte_29_13:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 11
-; CHECK-O0-NEXT: vinsertb 2, 3, 2
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_byte_29_13:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 6
-; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 13
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_byte_29_13:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 11
+; CHECK-NEXT: vinsertb 3, 2, 2
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_byte_29_13:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 6
-; CHECK-BE-O0-NEXT: vinsertb 2, 3, 13
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_byte_29_13:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 6
+; CHECK-BE-NEXT: vinsertb 3, 2, 13
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 13, i32 30, i32 31>
ret <16 x i8> %vecins
}
define <16 x i8> @shuffle_vector_byte_30_6(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_byte_30_6:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 2
-; CHECK-OPT-NEXT: vinsertb 3, 2, 1
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_byte_30_6:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 2
-; CHECK-O0-NEXT: vinsertb 2, 3, 1
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_byte_30_6:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 15
-; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 14
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_byte_30_6:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 2
+; CHECK-NEXT: vinsertb 3, 2, 1
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_byte_30_6:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 15
-; CHECK-BE-O0-NEXT: vinsertb 2, 3, 14
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_byte_30_6:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 15
+; CHECK-BE-NEXT: vinsertb 3, 2, 14
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 6, i32 31>
ret <16 x i8> %vecins
}
define <16 x i8> @shuffle_vector_byte_31_15(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-OPT-LABEL: shuffle_vector_byte_31_15:
-; CHECK-OPT: # %bb.0: # %entry
-; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 9
-; CHECK-OPT-NEXT: vinsertb 3, 2, 0
-; CHECK-OPT-NEXT: vmr 2, 3
-; CHECK-OPT-NEXT: blr
-;
-; CHECK-O0-LABEL: shuffle_vector_byte_31_15:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-O0-NEXT: vmr 3, 2
-; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-O0-NEXT: vsldoi 3, 3, 3, 9
-; CHECK-O0-NEXT: vinsertb 2, 3, 0
-; CHECK-O0-NEXT: blr
-;
-; CHECK-BE-OPT-LABEL: shuffle_vector_byte_31_15:
-; CHECK-BE-OPT: # %bb.0: # %entry
-; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 8
-; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 15
-; CHECK-BE-OPT-NEXT: vmr 2, 3
-; CHECK-BE-OPT-NEXT: blr
+; CHECK-LABEL: shuffle_vector_byte_31_15:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsldoi 2, 2, 2, 9
+; CHECK-NEXT: vinsertb 3, 2, 0
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
;
-; CHECK-BE-O0-LABEL: shuffle_vector_byte_31_15:
-; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill
-; CHECK-BE-O0-NEXT: vmr 3, 2
-; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload
-; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 8
-; CHECK-BE-O0-NEXT: vinsertb 2, 3, 15
-; CHECK-BE-O0-NEXT: blr
+; CHECK-BE-LABEL: shuffle_vector_byte_31_15:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vsldoi 2, 2, 2, 8
+; CHECK-BE-NEXT: vinsertb 3, 2, 15
+; CHECK-BE-NEXT: vmr 2, 3
+; CHECK-BE-NEXT: blr
entry:
%vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 15>
ret <16 x i8> %vecins
@@ -1749,8 +1321,8 @@ define <8 x i16> @insert_halfword_0(<8 x i16> %a, i16 %b) {
;
; CHECK-O0-LABEL: insert_halfword_0:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinserth 2, 3, 14
; CHECK-O0-NEXT: blr
@@ -1763,8 +1335,8 @@ define <8 x i16> @insert_halfword_0(<8 x i16> %a, i16 %b) {
;
; CHECK-BE-O0-LABEL: insert_halfword_0:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinserth 2, 3, 0
; CHECK-BE-O0-NEXT: blr
@@ -1782,8 +1354,8 @@ define <8 x i16> @insert_halfword_1(<8 x i16> %a, i16 %b) {
;
; CHECK-O0-LABEL: insert_halfword_1:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinserth 2, 3, 12
; CHECK-O0-NEXT: blr
@@ -1796,8 +1368,8 @@ define <8 x i16> @insert_halfword_1(<8 x i16> %a, i16 %b) {
;
; CHECK-BE-O0-LABEL: insert_halfword_1:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinserth 2, 3, 2
; CHECK-BE-O0-NEXT: blr
@@ -1815,8 +1387,8 @@ define <8 x i16> @insert_halfword_2(<8 x i16> %a, i16 %b) {
;
; CHECK-O0-LABEL: insert_halfword_2:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinserth 2, 3, 10
; CHECK-O0-NEXT: blr
@@ -1829,8 +1401,8 @@ define <8 x i16> @insert_halfword_2(<8 x i16> %a, i16 %b) {
;
; CHECK-BE-O0-LABEL: insert_halfword_2:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinserth 2, 3, 4
; CHECK-BE-O0-NEXT: blr
@@ -1848,8 +1420,8 @@ define <8 x i16> @insert_halfword_3(<8 x i16> %a, i16 %b) {
;
; CHECK-O0-LABEL: insert_halfword_3:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinserth 2, 3, 8
; CHECK-O0-NEXT: blr
@@ -1862,8 +1434,8 @@ define <8 x i16> @insert_halfword_3(<8 x i16> %a, i16 %b) {
;
; CHECK-BE-O0-LABEL: insert_halfword_3:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinserth 2, 3, 6
; CHECK-BE-O0-NEXT: blr
@@ -1881,8 +1453,8 @@ define <8 x i16> @insert_halfword_4(<8 x i16> %a, i16 %b) {
;
; CHECK-O0-LABEL: insert_halfword_4:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinserth 2, 3, 6
; CHECK-O0-NEXT: blr
@@ -1895,8 +1467,8 @@ define <8 x i16> @insert_halfword_4(<8 x i16> %a, i16 %b) {
;
; CHECK-BE-O0-LABEL: insert_halfword_4:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinserth 2, 3, 8
; CHECK-BE-O0-NEXT: blr
@@ -1914,8 +1486,8 @@ define <8 x i16> @insert_halfword_5(<8 x i16> %a, i16 %b) {
;
; CHECK-O0-LABEL: insert_halfword_5:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinserth 2, 3, 4
; CHECK-O0-NEXT: blr
@@ -1928,8 +1500,8 @@ define <8 x i16> @insert_halfword_5(<8 x i16> %a, i16 %b) {
;
; CHECK-BE-O0-LABEL: insert_halfword_5:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinserth 2, 3, 10
; CHECK-BE-O0-NEXT: blr
@@ -1947,8 +1519,8 @@ define <8 x i16> @insert_halfword_6(<8 x i16> %a, i16 %b) {
;
; CHECK-O0-LABEL: insert_halfword_6:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinserth 2, 3, 2
; CHECK-O0-NEXT: blr
@@ -1961,8 +1533,8 @@ define <8 x i16> @insert_halfword_6(<8 x i16> %a, i16 %b) {
;
; CHECK-BE-O0-LABEL: insert_halfword_6:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinserth 2, 3, 12
; CHECK-BE-O0-NEXT: blr
@@ -1980,8 +1552,8 @@ define <8 x i16> @insert_halfword_7(<8 x i16> %a, i16 %b) {
;
; CHECK-O0-LABEL: insert_halfword_7:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinserth 2, 3, 0
; CHECK-O0-NEXT: blr
@@ -1994,8 +1566,8 @@ define <8 x i16> @insert_halfword_7(<8 x i16> %a, i16 %b) {
;
; CHECK-BE-O0-LABEL: insert_halfword_7:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinserth 2, 3, 14
; CHECK-BE-O0-NEXT: blr
@@ -2015,8 +1587,8 @@ define <16 x i8> @insert_byte_0(<16 x i8> %a, i8 %b) {
;
; CHECK-O0-LABEL: insert_byte_0:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinsertb 2, 3, 15
; CHECK-O0-NEXT: blr
@@ -2029,8 +1601,8 @@ define <16 x i8> @insert_byte_0(<16 x i8> %a, i8 %b) {
;
; CHECK-BE-O0-LABEL: insert_byte_0:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinsertb 2, 3, 0
; CHECK-BE-O0-NEXT: blr
@@ -2048,8 +1620,8 @@ define <16 x i8> @insert_byte_1(<16 x i8> %a, i8 %b) {
;
; CHECK-O0-LABEL: insert_byte_1:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinsertb 2, 3, 14
; CHECK-O0-NEXT: blr
@@ -2062,8 +1634,8 @@ define <16 x i8> @insert_byte_1(<16 x i8> %a, i8 %b) {
;
; CHECK-BE-O0-LABEL: insert_byte_1:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinsertb 2, 3, 1
; CHECK-BE-O0-NEXT: blr
@@ -2081,8 +1653,8 @@ define <16 x i8> @insert_byte_2(<16 x i8> %a, i8 %b) {
;
; CHECK-O0-LABEL: insert_byte_2:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinsertb 2, 3, 13
; CHECK-O0-NEXT: blr
@@ -2095,8 +1667,8 @@ define <16 x i8> @insert_byte_2(<16 x i8> %a, i8 %b) {
;
; CHECK-BE-O0-LABEL: insert_byte_2:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinsertb 2, 3, 2
; CHECK-BE-O0-NEXT: blr
@@ -2114,8 +1686,8 @@ define <16 x i8> @insert_byte_3(<16 x i8> %a, i8 %b) {
;
; CHECK-O0-LABEL: insert_byte_3:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinsertb 2, 3, 12
; CHECK-O0-NEXT: blr
@@ -2128,8 +1700,8 @@ define <16 x i8> @insert_byte_3(<16 x i8> %a, i8 %b) {
;
; CHECK-BE-O0-LABEL: insert_byte_3:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinsertb 2, 3, 3
; CHECK-BE-O0-NEXT: blr
@@ -2147,8 +1719,8 @@ define <16 x i8> @insert_byte_4(<16 x i8> %a, i8 %b) {
;
; CHECK-O0-LABEL: insert_byte_4:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinsertb 2, 3, 11
; CHECK-O0-NEXT: blr
@@ -2161,8 +1733,8 @@ define <16 x i8> @insert_byte_4(<16 x i8> %a, i8 %b) {
;
; CHECK-BE-O0-LABEL: insert_byte_4:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinsertb 2, 3, 4
; CHECK-BE-O0-NEXT: blr
@@ -2180,8 +1752,8 @@ define <16 x i8> @insert_byte_5(<16 x i8> %a, i8 %b) {
;
; CHECK-O0-LABEL: insert_byte_5:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinsertb 2, 3, 10
; CHECK-O0-NEXT: blr
@@ -2194,8 +1766,8 @@ define <16 x i8> @insert_byte_5(<16 x i8> %a, i8 %b) {
;
; CHECK-BE-O0-LABEL: insert_byte_5:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinsertb 2, 3, 5
; CHECK-BE-O0-NEXT: blr
@@ -2213,8 +1785,8 @@ define <16 x i8> @insert_byte_6(<16 x i8> %a, i8 %b) {
;
; CHECK-O0-LABEL: insert_byte_6:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinsertb 2, 3, 9
; CHECK-O0-NEXT: blr
@@ -2227,8 +1799,8 @@ define <16 x i8> @insert_byte_6(<16 x i8> %a, i8 %b) {
;
; CHECK-BE-O0-LABEL: insert_byte_6:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinsertb 2, 3, 6
; CHECK-BE-O0-NEXT: blr
@@ -2246,8 +1818,8 @@ define <16 x i8> @insert_byte_7(<16 x i8> %a, i8 %b) {
;
; CHECK-O0-LABEL: insert_byte_7:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinsertb 2, 3, 8
; CHECK-O0-NEXT: blr
@@ -2260,8 +1832,8 @@ define <16 x i8> @insert_byte_7(<16 x i8> %a, i8 %b) {
;
; CHECK-BE-O0-LABEL: insert_byte_7:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinsertb 2, 3, 7
; CHECK-BE-O0-NEXT: blr
@@ -2279,8 +1851,8 @@ define <16 x i8> @insert_byte_8(<16 x i8> %a, i8 %b) {
;
; CHECK-O0-LABEL: insert_byte_8:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinsertb 2, 3, 7
; CHECK-O0-NEXT: blr
@@ -2293,8 +1865,8 @@ define <16 x i8> @insert_byte_8(<16 x i8> %a, i8 %b) {
;
; CHECK-BE-O0-LABEL: insert_byte_8:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinsertb 2, 3, 8
; CHECK-BE-O0-NEXT: blr
@@ -2312,8 +1884,8 @@ define <16 x i8> @insert_byte_9(<16 x i8> %a, i8 %b) {
;
; CHECK-O0-LABEL: insert_byte_9:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinsertb 2, 3, 6
; CHECK-O0-NEXT: blr
@@ -2326,8 +1898,8 @@ define <16 x i8> @insert_byte_9(<16 x i8> %a, i8 %b) {
;
; CHECK-BE-O0-LABEL: insert_byte_9:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinsertb 2, 3, 9
; CHECK-BE-O0-NEXT: blr
@@ -2345,8 +1917,8 @@ define <16 x i8> @insert_byte_10(<16 x i8> %a, i8 %b) {
;
; CHECK-O0-LABEL: insert_byte_10:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinsertb 2, 3, 5
; CHECK-O0-NEXT: blr
@@ -2359,8 +1931,8 @@ define <16 x i8> @insert_byte_10(<16 x i8> %a, i8 %b) {
;
; CHECK-BE-O0-LABEL: insert_byte_10:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinsertb 2, 3, 10
; CHECK-BE-O0-NEXT: blr
@@ -2378,8 +1950,8 @@ define <16 x i8> @insert_byte_11(<16 x i8> %a, i8 %b) {
;
; CHECK-O0-LABEL: insert_byte_11:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinsertb 2, 3, 4
; CHECK-O0-NEXT: blr
@@ -2392,8 +1964,8 @@ define <16 x i8> @insert_byte_11(<16 x i8> %a, i8 %b) {
;
; CHECK-BE-O0-LABEL: insert_byte_11:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinsertb 2, 3, 11
; CHECK-BE-O0-NEXT: blr
@@ -2411,8 +1983,8 @@ define <16 x i8> @insert_byte_12(<16 x i8> %a, i8 %b) {
;
; CHECK-O0-LABEL: insert_byte_12:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinsertb 2, 3, 3
; CHECK-O0-NEXT: blr
@@ -2425,8 +1997,8 @@ define <16 x i8> @insert_byte_12(<16 x i8> %a, i8 %b) {
;
; CHECK-BE-O0-LABEL: insert_byte_12:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinsertb 2, 3, 12
; CHECK-BE-O0-NEXT: blr
@@ -2444,8 +2016,8 @@ define <16 x i8> @insert_byte_13(<16 x i8> %a, i8 %b) {
;
; CHECK-O0-LABEL: insert_byte_13:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinsertb 2, 3, 2
; CHECK-O0-NEXT: blr
@@ -2458,8 +2030,8 @@ define <16 x i8> @insert_byte_13(<16 x i8> %a, i8 %b) {
;
; CHECK-BE-O0-LABEL: insert_byte_13:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinsertb 2, 3, 13
; CHECK-BE-O0-NEXT: blr
@@ -2477,8 +2049,8 @@ define <16 x i8> @insert_byte_14(<16 x i8> %a, i8 %b) {
;
; CHECK-O0-LABEL: insert_byte_14:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinsertb 2, 3, 1
; CHECK-O0-NEXT: blr
@@ -2491,8 +2063,8 @@ define <16 x i8> @insert_byte_14(<16 x i8> %a, i8 %b) {
;
; CHECK-BE-O0-LABEL: insert_byte_14:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinsertb 2, 3, 14
; CHECK-BE-O0-NEXT: blr
@@ -2510,8 +2082,8 @@ define <16 x i8> @insert_byte_15(<16 x i8> %a, i8 %b) {
;
; CHECK-O0-LABEL: insert_byte_15:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: mr 3, 5
-; CHECK-O0-NEXT: mtfprwz 0, 3
+; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-O0-NEXT: mtfprwz 0, 5
; CHECK-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-O0-NEXT: vinsertb 2, 3, 0
; CHECK-O0-NEXT: blr
@@ -2524,8 +2096,8 @@ define <16 x i8> @insert_byte_15(<16 x i8> %a, i8 %b) {
;
; CHECK-BE-O0-LABEL: insert_byte_15:
; CHECK-BE-O0: # %bb.0: # %entry
-; CHECK-BE-O0-NEXT: mr 3, 5
-; CHECK-BE-O0-NEXT: mtfprwz 0, 3
+; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5
+; CHECK-BE-O0-NEXT: mtfprwz 0, 5
; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0
; CHECK-BE-O0-NEXT: vinsertb 2, 3, 15
; CHECK-BE-O0-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/popcount.ll b/llvm/test/CodeGen/PowerPC/popcount.ll
index 107ae5484b5b..fb20f1d3ee43 100644
--- a/llvm/test/CodeGen/PowerPC/popcount.ll
+++ b/llvm/test/CodeGen/PowerPC/popcount.ll
@@ -5,12 +5,11 @@
define i8 @popcount128(i128* nocapture nonnull readonly %0) {
; CHECK-LABEL: popcount128:
; CHECK: # %bb.0: # %Entry
-; CHECK-NEXT: mr 4, 3
-; CHECK-NEXT: ld 3, 0(4)
-; CHECK-NEXT: ld 4, 8(4)
-; CHECK-NEXT: popcntd 4, 4
+; CHECK-NEXT: ld 4, 0(3)
+; CHECK-NEXT: ld 3, 8(3)
; CHECK-NEXT: popcntd 3, 3
-; CHECK-NEXT: add 3, 3, 4
+; CHECK-NEXT: popcntd 4, 4
+; CHECK-NEXT: add 3, 4, 3
; CHECK-NEXT: # kill: def $r3 killed $r3 killed $x3
; CHECK-NEXT: clrldi 3, 3, 56
; CHECK-NEXT: blr
@@ -28,18 +27,17 @@ declare i128 @llvm.ctpop.i128(i128)
define i16 @popcount256(i256* nocapture nonnull readonly %0) {
; CHECK-LABEL: popcount256:
; CHECK: # %bb.0: # %Entry
-; CHECK-NEXT: mr 6, 3
-; CHECK-NEXT: ld 3, 0(6)
-; CHECK-NEXT: ld 5, 8(6)
-; CHECK-NEXT: ld 4, 16(6)
-; CHECK-NEXT: ld 6, 24(6)
+; CHECK-NEXT: ld 4, 0(3)
+; CHECK-NEXT: ld 5, 8(3)
+; CHECK-NEXT: ld 6, 16(3)
+; CHECK-NEXT: ld 3, 24(3)
+; CHECK-NEXT: popcntd 3, 3
; CHECK-NEXT: popcntd 6, 6
-; CHECK-NEXT: popcntd 4, 4
-; CHECK-NEXT: add 4, 4, 6
+; CHECK-NEXT: add 3, 6, 3
; CHECK-NEXT: popcntd 5, 5
-; CHECK-NEXT: popcntd 3, 3
-; CHECK-NEXT: add 3, 3, 5
-; CHECK-NEXT: add 3, 3, 4
+; CHECK-NEXT: popcntd 4, 4
+; CHECK-NEXT: add 4, 4, 5
+; CHECK-NEXT: add 3, 4, 3
; CHECK-NEXT: # kill: def $r3 killed $r3 killed $x3
; CHECK-NEXT: clrldi 3, 3, 48
; CHECK-NEXT: blr
@@ -59,18 +57,18 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) {
; CHECK-NEXT: xxlor 0, 34, 34
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vsl0
; CHECK-NEXT: mffprd 3, 0
-; CHECK-NEXT: popcntd 4, 3
+; CHECK-NEXT: popcntd 3, 3
; CHECK-NEXT: xxswapd 0, 34
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vsl0
-; CHECK-NEXT: mffprd 3, 0
-; CHECK-NEXT: popcntd 3, 3
-; CHECK-NEXT: add 3, 3, 4
-; CHECK-NEXT: mtfprd 0, 3
-; CHECK-NEXT: fmr 1, 0
-; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: mffprd 4, 0
+; CHECK-NEXT: popcntd 4, 4
+; CHECK-NEXT: add 3, 4, 3
; CHECK-NEXT: mtfprd 0, 3
; CHECK-NEXT: # kill: def $vsl0 killed $f0
-; CHECK-NEXT: xxmrghd 34, 0, 1
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: mtfprd 1, 3
+; CHECK-NEXT: # kill: def $vsl1 killed $f1
+; CHECK-NEXT: xxmrghd 34, 1, 0
; CHECK-NEXT: blr
Entry:
%1 = tail call <1 x i128> @llvm.ctpop.v1.i128(<1 x i128> %0)
diff --git a/llvm/test/CodeGen/PowerPC/spill-nor0.ll b/llvm/test/CodeGen/PowerPC/spill-nor0.ll
index c9c665144829..4eeb34d0f899 100644
--- a/llvm/test/CodeGen/PowerPC/spill-nor0.ll
+++ b/llvm/test/CodeGen/PowerPC/spill-nor0.ll
@@ -12,12 +12,6 @@ if.then: ; preds = %entry
if.end: ; preds = %entry
%0 = call i64 asm sideeffect "mr 3,$1\0A\09mr 4,$2\0A\09rotldi 0,0,3 ; rotldi 0,0,13\0A\09rotldi 0,0,61 ; rotldi 0,0,51\0A\09or 1,1,1\0A\09mr $0,3", "=b,b,b,~{cc},~{memory},~{r3},~{r4}"(i32 0, i64* undef) #0
- br i1 undef, label %end0, label %end1 ; need successor blocks to force spill
-
-end0:
- unreachable
-
-end1:
unreachable
; CHECK-LABEL: @_ZN4llvm3sys17RunningOnValgrindEv
diff --git a/llvm/test/CodeGen/PowerPC/spill-nor0.mir b/llvm/test/CodeGen/PowerPC/spill-nor0.mir
deleted file mode 100644
index 2f50ff3701d1..000000000000
--- a/llvm/test/CodeGen/PowerPC/spill-nor0.mir
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llc -o - %s -mtriple=powerpc64-- -run-pass=regallocfast | FileCheck %s
----
-# CHECK-LABEL: name: func
-name: func
-tracksRegLiveness: true
-body: |
- bb.0:
- %0 : gprc = LI 42
- %1 : gprc_nor0 = COPY %0
- ; CHECK: STW
-
- ; Clobber all regs to force a spill
- NOP csr_noregs
-
- ; CHECK: LWZ
- NOP implicit %1
-...
diff --git a/llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll b/llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll
index 7c6b9eaa6a06..fc939e170ffb 100644
--- a/llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll
@@ -6,12 +6,11 @@
; CHECK: mflr 0
; CHECK-NEXT: stw 0, 4(1)
; CHECK-NEXT: lis 0, -2
-; CHECK-NEXT: ori 0, 0, 65504
+; CHECK-NEXT: ori 0, 0, 65488
; CHECK-NEXT: stwux 1, 1, 0
; CHECK-NEXT: sub 0, 1, 0
; CHECK-NEXT: lis 4, __stack_chk_guard at ha
-; CHECK-NEXT: stw 4, 16(1)
-; CHECK-NEXT: lwz 4, __stack_chk_guard at l(4)
-; CHECK-NEXT: lis 5, 1
-; CHECK-NEXT: ori 5, 5, 28
-; CHECK-NEXT: stwx 4, 1, 5
+; CHECK-NEXT: lwz 5, __stack_chk_guard at l(4)
+; CHECK-NEXT: lis 6, 1
+; CHECK-NEXT: ori 6, 6, 44
+; CHECK-NEXT: stwx 5, 1, 6
diff --git a/llvm/test/CodeGen/PowerPC/vsx-args.ll b/llvm/test/CodeGen/PowerPC/vsx-args.ll
index 8cd2dbfde279..3e387d8da7d4 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-args.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-args.ll
@@ -24,14 +24,11 @@ entry:
; CHECK: blr
; CHECK-FISL-LABEL: @main
-; CHECK-FISL: stxvd2x 36, 1, 3
-; CHECK-FISL: vmr 4, 3
-; CHECK-FISL: lxvd2x 35, 1, 3
-; CHECK-FISL: 3, 144
-; CHCEK-FISL: stxvd2x 36, 1, 3
-; CHECK-FISL: vmr 4, 2
+; CHECK-FISL: stxvd2x 34
+; CHECK-FISL: vmr 2, 3
+; CHECK-FISL: vmr 3, 4
+; CHECK-FISL: lxvd2x 36
; CHECK-FISL: bl sv
-
; CHECK-FISL: lxvd2x [[VC:[0-9]+]],
; CHECK-FISL: xvadddp 34, 34, [[VC]]
; CHECK-FISL: blr
@@ -39,3 +36,4 @@ entry:
attributes #0 = { noinline nounwind readnone }
attributes #1 = { nounwind }
+
diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index 6349523bc395..4a78218262ca 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -164,7 +164,8 @@ define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) {
;
; CHECK-FISL-LABEL: test6:
; CHECK-FISL: # %bb.0: # %entry
-; CHECK-FISL-NEXT: xxlxor v2, v2, v3
+; CHECK-FISL-NEXT: xxlxor vs0, v2, v3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test6:
@@ -192,7 +193,8 @@ define <16 x i8> @test7(<16 x i8> %a, <16 x i8> %b) {
;
; CHECK-FISL-LABEL: test7:
; CHECK-FISL: # %bb.0: # %entry
-; CHECK-FISL-NEXT: xxlxor v2, v2, v3
+; CHECK-FISL-NEXT: xxlxor vs0, v2, v3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test7:
@@ -248,7 +250,8 @@ define <8 x i16> @test9(<8 x i16> %a, <8 x i16> %b) {
;
; CHECK-FISL-LABEL: test9:
; CHECK-FISL: # %bb.0: # %entry
-; CHECK-FISL-NEXT: xxlor v2, v2, v3
+; CHECK-FISL-NEXT: xxlor vs0, v2, v3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test9:
@@ -276,7 +279,8 @@ define <16 x i8> @test10(<16 x i8> %a, <16 x i8> %b) {
;
; CHECK-FISL-LABEL: test10:
; CHECK-FISL: # %bb.0: # %entry
-; CHECK-FISL-NEXT: xxlor v2, v2, v3
+; CHECK-FISL-NEXT: xxlor vs0, v2, v3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test10:
@@ -332,7 +336,8 @@ define <8 x i16> @test12(<8 x i16> %a, <8 x i16> %b) {
;
; CHECK-FISL-LABEL: test12:
; CHECK-FISL: # %bb.0: # %entry
-; CHECK-FISL-NEXT: xxland v2, v2, v3
+; CHECK-FISL-NEXT: xxland vs0, v2, v3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test12:
@@ -360,7 +365,8 @@ define <16 x i8> @test13(<16 x i8> %a, <16 x i8> %b) {
;
; CHECK-FISL-LABEL: test13:
; CHECK-FISL: # %bb.0: # %entry
-; CHECK-FISL-NEXT: xxland v2, v2, v3
+; CHECK-FISL-NEXT: xxland vs0, v2, v3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test13:
@@ -418,8 +424,10 @@ define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) {
;
; CHECK-FISL-LABEL: test15:
; CHECK-FISL: # %bb.0: # %entry
-; CHECK-FISL-NEXT: xxlor v4, v2, v3
-; CHECK-FISL-NEXT: xxlnor v2, v2, v3
+; CHECK-FISL-NEXT: xxlor vs0, v2, v3
+; CHECK-FISL-NEXT: xxlor v4, vs0, vs0
+; CHECK-FISL-NEXT: xxlnor vs0, v2, v3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test15:
@@ -448,8 +456,10 @@ define <16 x i8> @test16(<16 x i8> %a, <16 x i8> %b) {
;
; CHECK-FISL-LABEL: test16:
; CHECK-FISL: # %bb.0: # %entry
-; CHECK-FISL-NEXT: xxlor v4, v2, v3
-; CHECK-FISL-NEXT: xxlnor v2, v2, v3
+; CHECK-FISL-NEXT: xxlor vs0, v2, v3
+; CHECK-FISL-NEXT: xxlor v4, vs0, vs0
+; CHECK-FISL-NEXT: xxlnor vs0, v2, v3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test16:
@@ -508,8 +518,10 @@ define <8 x i16> @test18(<8 x i16> %a, <8 x i16> %b) {
;
; CHECK-FISL-LABEL: test18:
; CHECK-FISL: # %bb.0: # %entry
-; CHECK-FISL-NEXT: xxlnor v4, v3, v3
-; CHECK-FISL-NEXT: xxlandc v2, v2, v3
+; CHECK-FISL-NEXT: xxlnor vs0, v3, v3
+; CHECK-FISL-NEXT: xxlor v4, vs0, vs0
+; CHECK-FISL-NEXT: xxlandc vs0, v2, v3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test18:
@@ -538,8 +550,10 @@ define <16 x i8> @test19(<16 x i8> %a, <16 x i8> %b) {
;
; CHECK-FISL-LABEL: test19:
; CHECK-FISL: # %bb.0: # %entry
-; CHECK-FISL-NEXT: xxlnor v4, v3, v3
-; CHECK-FISL-NEXT: xxlandc v2, v2, v3
+; CHECK-FISL-NEXT: xxlnor vs0, v3, v3
+; CHECK-FISL-NEXT: xxlor v4, vs0, vs0
+; CHECK-FISL-NEXT: xxlandc vs0, v2, v3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test19:
@@ -650,10 +664,10 @@ define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x
; CHECK-FISL: # %bb.0: # %entry
; CHECK-FISL-NEXT: xvcmpeqsp vs0, v4, v5
; CHECK-FISL-NEXT: xvcmpeqsp vs1, v5, v5
-; CHECK-FISL-NEXT: xxlnor vs2, vs1, vs1
-; CHECK-FISL-NEXT: xvcmpeqsp vs1, v4, v4
; CHECK-FISL-NEXT: xxlnor vs1, vs1, vs1
-; CHECK-FISL-NEXT: xxlor vs1, vs1, vs2
+; CHECK-FISL-NEXT: xvcmpeqsp vs2, v4, v4
+; CHECK-FISL-NEXT: xxlnor vs2, vs2, vs2
+; CHECK-FISL-NEXT: xxlor vs1, vs2, vs1
; CHECK-FISL-NEXT: xxlor vs0, vs0, vs1
; CHECK-FISL-NEXT: xxsel v2, v3, v2, vs0
; CHECK-FISL-NEXT: blr
@@ -694,8 +708,8 @@ define <8 x i16> @test23(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d)
; CHECK-FISL-LABEL: test23:
; CHECK-FISL: # %bb.0: # %entry
; CHECK-FISL-NEXT: vcmpequh v4, v4, v5
-; CHECK-FISL-NEXT: xxlor vs0, v4, v4
-; CHECK-FISL-NEXT: xxsel v2, v3, v2, vs0
+; CHECK-FISL-NEXT: xxsel vs0, v3, v2, v4
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test23:
@@ -728,8 +742,8 @@ define <16 x i8> @test24(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d)
; CHECK-FISL-LABEL: test24:
; CHECK-FISL: # %bb.0: # %entry
; CHECK-FISL-NEXT: vcmpequb v4, v4, v5
-; CHECK-FISL-NEXT: xxlor vs0, v4, v4
-; CHECK-FISL-NEXT: xxsel v2, v3, v2, vs0
+; CHECK-FISL-NEXT: xxsel vs0, v3, v2, v4
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test24:
@@ -821,16 +835,17 @@ define <2 x i64> @test26(<2 x i64> %a, <2 x i64> %b) {
; CHECK-FISL-NEXT: stxvd2x v3, 0, r3
; CHECK-FISL-NEXT: addi r3, r1, -48
; CHECK-FISL-NEXT: stxvd2x v2, 0, r3
-; CHECK-FISL-NEXT: ld r4, -24(r1)
-; CHECK-FISL-NEXT: ld r3, -40(r1)
-; CHECK-FISL-NEXT: add r3, r3, r4
+; CHECK-FISL-NEXT: ld r3, -24(r1)
+; CHECK-FISL-NEXT: ld r4, -40(r1)
+; CHECK-FISL-NEXT: add r3, r4, r3
; CHECK-FISL-NEXT: std r3, -8(r1)
-; CHECK-FISL-NEXT: ld r4, -32(r1)
-; CHECK-FISL-NEXT: ld r3, -48(r1)
-; CHECK-FISL-NEXT: add r3, r3, r4
+; CHECK-FISL-NEXT: ld r3, -32(r1)
+; CHECK-FISL-NEXT: ld r4, -48(r1)
+; CHECK-FISL-NEXT: add r3, r4, r3
; CHECK-FISL-NEXT: std r3, -16(r1)
; CHECK-FISL-NEXT: addi r3, r1, -16
-; CHECK-FISL-NEXT: lxvd2x v2, 0, r3
+; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test26:
@@ -860,7 +875,8 @@ define <2 x i64> @test27(<2 x i64> %a, <2 x i64> %b) {
;
; CHECK-FISL-LABEL: test27:
; CHECK-FISL: # %bb.0:
-; CHECK-FISL-NEXT: xxland v2, v2, v3
+; CHECK-FISL-NEXT: xxland vs0, v2, v3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test27:
@@ -994,7 +1010,8 @@ define <2 x i64> @test30(<2 x i64>* %a) {
;
; CHECK-FISL-LABEL: test30:
; CHECK-FISL: # %bb.0:
-; CHECK-FISL-NEXT: lxvd2x v2, 0, r3
+; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test30:
@@ -1112,10 +1129,10 @@ define <4 x float> @test32u(<4 x float>* %a) {
; CHECK-FISL-LABEL: test32u:
; CHECK-FISL: # %bb.0:
; CHECK-FISL-NEXT: li r4, 15
-; CHECK-FISL-NEXT: lvx v3, r3, r4
-; CHECK-FISL-NEXT: lvsl v4, 0, r3
-; CHECK-FISL-NEXT: lvx v2, 0, r3
-; CHECK-FISL-NEXT: vperm v2, v2, v3, v4
+; CHECK-FISL-NEXT: lvx v2, r3, r4
+; CHECK-FISL-NEXT: lvsl v3, 0, r3
+; CHECK-FISL-NEXT: lvx v4, 0, r3
+; CHECK-FISL-NEXT: vperm v2, v4, v2, v3
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test32u:
@@ -1373,10 +1390,10 @@ define <2 x float> @test44(<2 x i64> %a) {
; CHECK-FISL-NEXT: fcfidus f0, f0
; CHECK-FISL-NEXT: stfs f0, -64(r1)
; CHECK-FISL-NEXT: addi r3, r1, -48
-; CHECK-FISL-NEXT: lxvw4x v3, 0, r3
-; CHECK-FISL-NEXT: addi r3, r1, -64
; CHECK-FISL-NEXT: lxvw4x v2, 0, r3
-; CHECK-FISL-NEXT: vmrghw v2, v2, v3
+; CHECK-FISL-NEXT: addi r3, r1, -64
+; CHECK-FISL-NEXT: lxvw4x v3, 0, r3
+; CHECK-FISL-NEXT: vmrghw v2, v3, v2
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test44:
@@ -1455,10 +1472,10 @@ define <2 x float> @test45(<2 x i64> %a) {
; CHECK-FISL-NEXT: fcfids f0, f0
; CHECK-FISL-NEXT: stfs f0, -64(r1)
; CHECK-FISL-NEXT: addi r3, r1, -48
-; CHECK-FISL-NEXT: lxvw4x v3, 0, r3
-; CHECK-FISL-NEXT: addi r3, r1, -64
; CHECK-FISL-NEXT: lxvw4x v2, 0, r3
-; CHECK-FISL-NEXT: vmrghw v2, v2, v3
+; CHECK-FISL-NEXT: addi r3, r1, -64
+; CHECK-FISL-NEXT: lxvw4x v3, 0, r3
+; CHECK-FISL-NEXT: vmrghw v2, v3, v2
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test45:
@@ -1531,7 +1548,8 @@ define <2 x i64> @test46(<2 x float> %a) {
; CHECK-FISL-NEXT: ld r3, -24(r1)
; CHECK-FISL-NEXT: std r3, -16(r1)
; CHECK-FISL-NEXT: addi r3, r1, -16
-; CHECK-FISL-NEXT: lxvd2x v2, 0, r3
+; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test46:
@@ -1598,7 +1616,8 @@ define <2 x i64> @test47(<2 x float> %a) {
; CHECK-FISL-NEXT: ld r3, -24(r1)
; CHECK-FISL-NEXT: std r3, -16(r1)
; CHECK-FISL-NEXT: addi r3, r1, -16
-; CHECK-FISL-NEXT: lxvd2x v2, 0, r3
+; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test47:
@@ -1840,16 +1859,17 @@ define <2 x i64> @test60(<2 x i64> %a, <2 x i64> %b) {
; CHECK-FISL-NEXT: stxvd2x v3, 0, r3
; CHECK-FISL-NEXT: addi r3, r1, -48
; CHECK-FISL-NEXT: stxvd2x v2, 0, r3
-; CHECK-FISL-NEXT: lwz r4, -20(r1)
-; CHECK-FISL-NEXT: ld r3, -40(r1)
-; CHECK-FISL-NEXT: sld r3, r3, r4
+; CHECK-FISL-NEXT: lwz r3, -20(r1)
+; CHECK-FISL-NEXT: ld r4, -40(r1)
+; CHECK-FISL-NEXT: sld r3, r4, r3
; CHECK-FISL-NEXT: std r3, -8(r1)
-; CHECK-FISL-NEXT: lwz r4, -28(r1)
-; CHECK-FISL-NEXT: ld r3, -48(r1)
-; CHECK-FISL-NEXT: sld r3, r3, r4
+; CHECK-FISL-NEXT: lwz r3, -28(r1)
+; CHECK-FISL-NEXT: ld r4, -48(r1)
+; CHECK-FISL-NEXT: sld r3, r4, r3
; CHECK-FISL-NEXT: std r3, -16(r1)
; CHECK-FISL-NEXT: addi r3, r1, -16
-; CHECK-FISL-NEXT: lxvd2x v2, 0, r3
+; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test60:
@@ -1905,16 +1925,17 @@ define <2 x i64> @test61(<2 x i64> %a, <2 x i64> %b) {
; CHECK-FISL-NEXT: stxvd2x v3, 0, r3
; CHECK-FISL-NEXT: addi r3, r1, -48
; CHECK-FISL-NEXT: stxvd2x v2, 0, r3
-; CHECK-FISL-NEXT: lwz r4, -20(r1)
-; CHECK-FISL-NEXT: ld r3, -40(r1)
-; CHECK-FISL-NEXT: srd r3, r3, r4
+; CHECK-FISL-NEXT: lwz r3, -20(r1)
+; CHECK-FISL-NEXT: ld r4, -40(r1)
+; CHECK-FISL-NEXT: srd r3, r4, r3
; CHECK-FISL-NEXT: std r3, -8(r1)
-; CHECK-FISL-NEXT: lwz r4, -28(r1)
-; CHECK-FISL-NEXT: ld r3, -48(r1)
-; CHECK-FISL-NEXT: srd r3, r3, r4
+; CHECK-FISL-NEXT: lwz r3, -28(r1)
+; CHECK-FISL-NEXT: ld r4, -48(r1)
+; CHECK-FISL-NEXT: srd r3, r4, r3
; CHECK-FISL-NEXT: std r3, -16(r1)
; CHECK-FISL-NEXT: addi r3, r1, -16
-; CHECK-FISL-NEXT: lxvd2x v2, 0, r3
+; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test61:
@@ -1970,16 +1991,17 @@ define <2 x i64> @test62(<2 x i64> %a, <2 x i64> %b) {
; CHECK-FISL-NEXT: stxvd2x v3, 0, r3
; CHECK-FISL-NEXT: addi r3, r1, -48
; CHECK-FISL-NEXT: stxvd2x v2, 0, r3
-; CHECK-FISL-NEXT: lwz r4, -20(r1)
-; CHECK-FISL-NEXT: ld r3, -40(r1)
-; CHECK-FISL-NEXT: srad r3, r3, r4
+; CHECK-FISL-NEXT: lwz r3, -20(r1)
+; CHECK-FISL-NEXT: ld r4, -40(r1)
+; CHECK-FISL-NEXT: srad r3, r4, r3
; CHECK-FISL-NEXT: std r3, -8(r1)
-; CHECK-FISL-NEXT: lwz r4, -28(r1)
-; CHECK-FISL-NEXT: ld r3, -48(r1)
-; CHECK-FISL-NEXT: srad r3, r3, r4
+; CHECK-FISL-NEXT: lwz r3, -28(r1)
+; CHECK-FISL-NEXT: ld r4, -48(r1)
+; CHECK-FISL-NEXT: srad r3, r4, r3
; CHECK-FISL-NEXT: std r3, -16(r1)
; CHECK-FISL-NEXT: addi r3, r1, -16
-; CHECK-FISL-NEXT: lxvd2x v2, 0, r3
+; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test62:
@@ -2005,6 +2027,7 @@ define double @test63(<2 x double> %a) {
;
; CHECK-FISL-LABEL: test63:
; CHECK-FISL: # %bb.0:
+; CHECK-FISL-NEXT: # kill: def $vf2 killed $vf2 killed $v2
; CHECK-FISL-NEXT: xxlor f1, v2, v2
; CHECK-FISL-NEXT: blr
;
@@ -2036,6 +2059,7 @@ define double @test64(<2 x double> %a) {
; CHECK-FISL-LABEL: test64:
; CHECK-FISL: # %bb.0:
; CHECK-FISL-NEXT: xxswapd vs0, v2
+; CHECK-FISL-NEXT: # kill: def $f0 killed $f0 killed $vsl0
; CHECK-FISL-NEXT: fmr f1, f0
; CHECK-FISL-NEXT: blr
;
@@ -2093,7 +2117,8 @@ define <2 x i1> @test66(<2 x i64> %a, <2 x i64> %b) {
; CHECK-FISL-LABEL: test66:
; CHECK-FISL: # %bb.0:
; CHECK-FISL-NEXT: vcmpequw v2, v2, v3
-; CHECK-FISL-NEXT: xxlnor v2, v2, v2
+; CHECK-FISL-NEXT: xxlnor vs0, v2, v2
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test66:
@@ -2159,20 +2184,21 @@ define <2 x i1> @test67(<2 x i64> %a, <2 x i64> %b) {
; CHECK-FISL-NEXT: stxvd2x v3, 0, r3
; CHECK-FISL-NEXT: addi r3, r1, -48
; CHECK-FISL-NEXT: stxvd2x v2, 0, r3
-; CHECK-FISL-NEXT: ld r4, -24(r1)
-; CHECK-FISL-NEXT: ld r3, -40(r1)
-; CHECK-FISL-NEXT: cmpld r3, r4
-; CHECK-FISL-NEXT: li r4, 0
-; CHECK-FISL-NEXT: li r3, -1
-; CHECK-FISL-NEXT: isellt r5, r3, r4
+; CHECK-FISL-NEXT: ld r3, -24(r1)
+; CHECK-FISL-NEXT: ld r4, -40(r1)
+; CHECK-FISL-NEXT: cmpld r4, r3
+; CHECK-FISL-NEXT: li r3, 0
+; CHECK-FISL-NEXT: li r4, -1
+; CHECK-FISL-NEXT: isellt r5, r4, r3
; CHECK-FISL-NEXT: std r5, -8(r1)
-; CHECK-FISL-NEXT: ld r6, -32(r1)
-; CHECK-FISL-NEXT: ld r5, -48(r1)
-; CHECK-FISL-NEXT: cmpld r5, r6
-; CHECK-FISL-NEXT: isellt r3, r3, r4
+; CHECK-FISL-NEXT: ld r5, -32(r1)
+; CHECK-FISL-NEXT: ld r6, -48(r1)
+; CHECK-FISL-NEXT: cmpld r6, r5
+; CHECK-FISL-NEXT: isellt r3, r4, r3
; CHECK-FISL-NEXT: std r3, -16(r1)
; CHECK-FISL-NEXT: addi r3, r1, -16
-; CHECK-FISL-NEXT: lxvd2x v2, 0, r3
+; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test67:
@@ -2258,15 +2284,15 @@ define <2 x double> @test69(<2 x i16> %a) {
; CHECK-FISL-NEXT: addi r3, r3, .LCPI63_0 at toc@l
; CHECK-FISL-NEXT: lxvw4x v3, 0, r3
; CHECK-FISL-NEXT: vperm v2, v2, v2, v3
-; CHECK-FISL-NEXT: xxlor vs0, v2, v2
; CHECK-FISL-NEXT: addi r3, r1, -32
-; CHECK-FISL-NEXT: stxvd2x vs0, 0, r3
+; CHECK-FISL-NEXT: stxvd2x v2, 0, r3
; CHECK-FISL-NEXT: lha r3, -18(r1)
; CHECK-FISL-NEXT: std r3, -8(r1)
; CHECK-FISL-NEXT: lha r3, -26(r1)
; CHECK-FISL-NEXT: std r3, -16(r1)
; CHECK-FISL-NEXT: addi r3, r1, -16
-; CHECK-FISL-NEXT: lxvd2x v2, 0, r3
+; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: xvcvsxddp v2, v2
; CHECK-FISL-NEXT: blr
;
@@ -2336,9 +2362,8 @@ define <2 x double> @test70(<2 x i8> %a) {
; CHECK-FISL-NEXT: addi r3, r3, .LCPI64_0 at toc@l
; CHECK-FISL-NEXT: lxvw4x v3, 0, r3
; CHECK-FISL-NEXT: vperm v2, v2, v2, v3
-; CHECK-FISL-NEXT: xxlor vs0, v2, v2
; CHECK-FISL-NEXT: addi r3, r1, -32
-; CHECK-FISL-NEXT: stxvd2x vs0, 0, r3
+; CHECK-FISL-NEXT: stxvd2x v2, 0, r3
; CHECK-FISL-NEXT: ld r3, -24(r1)
; CHECK-FISL-NEXT: extsb r3, r3
; CHECK-FISL-NEXT: std r3, -8(r1)
@@ -2346,7 +2371,8 @@ define <2 x double> @test70(<2 x i8> %a) {
; CHECK-FISL-NEXT: extsb r3, r3
; CHECK-FISL-NEXT: std r3, -16(r1)
; CHECK-FISL-NEXT: addi r3, r1, -16
-; CHECK-FISL-NEXT: lxvd2x v2, 0, r3
+; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3
+; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
; CHECK-FISL-NEXT: xvcvsxddp v2, v2
; CHECK-FISL-NEXT: blr
;
@@ -2468,16 +2494,16 @@ define double @test82(double %a, double %b, double %c, double %d) {
;
; CHECK-FISL-LABEL: test82:
; CHECK-FISL: # %bb.0: # %entry
-; CHECK-FISL-NEXT: stfd f2, -16(r1) # 8-byte Folded Spill
-; CHECK-FISL-NEXT: fmr f2, f1
; CHECK-FISL-NEXT: xscmpudp cr0, f3, f4
; CHECK-FISL-NEXT: stfd f2, -8(r1) # 8-byte Folded Spill
+; CHECK-FISL-NEXT: stfd f1, -16(r1) # 8-byte Folded Spill
; CHECK-FISL-NEXT: beq cr0, .LBB67_2
; CHECK-FISL-NEXT: # %bb.1: # %entry
-; CHECK-FISL-NEXT: lfd f0, -16(r1) # 8-byte Folded Reload
-; CHECK-FISL-NEXT: stfd f0, -8(r1) # 8-byte Folded Spill
+; CHECK-FISL-NEXT: lfd f0, -8(r1) # 8-byte Folded Reload
+; CHECK-FISL-NEXT: stfd f0, -16(r1) # 8-byte Folded Spill
; CHECK-FISL-NEXT: .LBB67_2: # %entry
-; CHECK-FISL-NEXT: lfd f1, -8(r1) # 8-byte Folded Reload
+; CHECK-FISL-NEXT: lfd f0, -16(r1) # 8-byte Folded Reload
+; CHECK-FISL-NEXT: fmr f1, f0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test82:
diff --git a/llvm/test/CodeGen/SPARC/fp16-promote.ll b/llvm/test/CodeGen/SPARC/fp16-promote.ll
index c4ce1cd9fc26..0c402430dadc 100644
--- a/llvm/test/CodeGen/SPARC/fp16-promote.ll
+++ b/llvm/test/CodeGen/SPARC/fp16-promote.ll
@@ -124,12 +124,15 @@ define void @test_fptrunc_float(float %f, half* %p) nounwind {
;
; V8-UNOPT-LABEL: test_fptrunc_float:
; V8-UNOPT: ! %bb.0:
-; V8-UNOPT-NEXT: save %sp, -96, %sp
+; V8-UNOPT-NEXT: save %sp, -104, %sp
+; V8-UNOPT-NEXT: st %i0, [%fp+-4]
+; V8-UNOPT-NEXT: ld [%fp+-4], %f0
; V8-UNOPT-NEXT: mov %i0, %o0
-; V8-UNOPT-NEXT: st %o0, [%fp+-4]
+; V8-UNOPT-NEXT: st %i1, [%fp+-8] ! 4-byte Folded Spill
; V8-UNOPT-NEXT: call __gnu_f2h_ieee
-; V8-UNOPT-NEXT: ld [%fp+-4], %f0
-; V8-UNOPT-NEXT: sth %o0, [%i1]
+; V8-UNOPT-NEXT: st %f0, [%fp+-12]
+; V8-UNOPT-NEXT: ld [%fp+-8], %i0 ! 4-byte Folded Reload
+; V8-UNOPT-NEXT: sth %o0, [%i0]
; V8-UNOPT-NEXT: ret
; V8-UNOPT-NEXT: restore
;
@@ -173,19 +176,21 @@ define void @test_fptrunc_double(double %d, half* %p) nounwind {
; V8-UNOPT-LABEL: test_fptrunc_double:
; V8-UNOPT: ! %bb.0:
; V8-UNOPT-NEXT: save %sp, -112, %sp
-; V8-UNOPT-NEXT: mov %i1, %i3
+; V8-UNOPT-NEXT: ! implicit-def: $i4_i5
; V8-UNOPT-NEXT: mov %i0, %i4
-; V8-UNOPT-NEXT: ! implicit-def: $i0_i1
-; V8-UNOPT-NEXT: mov %i4, %i0
-; V8-UNOPT-NEXT: mov %i3, %i1
-; V8-UNOPT-NEXT: std %i0, [%fp+-8]
+; V8-UNOPT-NEXT: mov %i1, %i5
+; V8-UNOPT-NEXT: std %i4, [%fp+-8]
; V8-UNOPT-NEXT: ldd [%fp+-8], %f0
; V8-UNOPT-NEXT: std %f0, [%fp+-16]
; V8-UNOPT-NEXT: ldd [%fp+-16], %i0
-; V8-UNOPT-NEXT: mov %i0, %o0
-; V8-UNOPT-NEXT: call __truncdfhf2
+; V8-UNOPT-NEXT: mov %i0, %i3
+; V8-UNOPT-NEXT: ! kill: def $i1 killed $i1 killed $i0_i1
+; V8-UNOPT-NEXT: mov %i3, %o0
; V8-UNOPT-NEXT: mov %i1, %o1
-; V8-UNOPT-NEXT: sth %o0, [%i2]
+; V8-UNOPT-NEXT: call __truncdfhf2
+; V8-UNOPT-NEXT: st %i2, [%fp+-20]
+; V8-UNOPT-NEXT: ld [%fp+-20], %i0 ! 4-byte Folded Reload
+; V8-UNOPT-NEXT: sth %o0, [%i0]
; V8-UNOPT-NEXT: ret
; V8-UNOPT-NEXT: restore
;
@@ -236,18 +241,21 @@ define void @test_fadd(half* %p, half* %q) nounwind {
;
; V8-UNOPT-LABEL: test_fadd:
; V8-UNOPT: ! %bb.0:
-; V8-UNOPT-NEXT: save %sp, -104, %sp
+; V8-UNOPT-NEXT: save %sp, -112, %sp
+; V8-UNOPT-NEXT: lduh [%i0], %o0
+; V8-UNOPT-NEXT: st %i1, [%fp+-8] ! 4-byte Folded Spill
; V8-UNOPT-NEXT: call __gnu_h2f_ieee
+; V8-UNOPT-NEXT: st %i0, [%fp+-12]
+; V8-UNOPT-NEXT: ld [%fp+-8], %i0 ! 4-byte Folded Reload
; V8-UNOPT-NEXT: lduh [%i0], %o0
-; V8-UNOPT-NEXT: st %f0, [%fp+-8] ! 4-byte Folded Spill
; V8-UNOPT-NEXT: call __gnu_h2f_ieee
-; V8-UNOPT-NEXT: lduh [%i1], %o0
-; V8-UNOPT-NEXT: fmovs %f0, %f1
-; V8-UNOPT-NEXT: ld [%fp+-8], %f0 ! 4-byte Folded Reload
-; V8-UNOPT-NEXT: fadds %f0, %f1, %f0
+; V8-UNOPT-NEXT: st %f0, [%fp+-16]
+; V8-UNOPT-NEXT: ld [%fp+-16], %f1 ! 4-byte Folded Reload
+; V8-UNOPT-NEXT: fadds %f1, %f0, %f0
; V8-UNOPT-NEXT: st %f0, [%fp+-4]
; V8-UNOPT-NEXT: call __gnu_f2h_ieee
; V8-UNOPT-NEXT: ld [%fp+-4], %o0
+; V8-UNOPT-NEXT: ld [%fp+-12], %i0 ! 4-byte Folded Reload
; V8-UNOPT-NEXT: sth %o0, [%i0]
; V8-UNOPT-NEXT: ret
; V8-UNOPT-NEXT: restore
@@ -310,18 +318,21 @@ define void @test_fmul(half* %p, half* %q) nounwind {
;
; V8-UNOPT-LABEL: test_fmul:
; V8-UNOPT: ! %bb.0:
-; V8-UNOPT-NEXT: save %sp, -104, %sp
+; V8-UNOPT-NEXT: save %sp, -112, %sp
+; V8-UNOPT-NEXT: lduh [%i0], %o0
+; V8-UNOPT-NEXT: st %i1, [%fp+-8] ! 4-byte Folded Spill
; V8-UNOPT-NEXT: call __gnu_h2f_ieee
+; V8-UNOPT-NEXT: st %i0, [%fp+-12]
+; V8-UNOPT-NEXT: ld [%fp+-8], %i0 ! 4-byte Folded Reload
; V8-UNOPT-NEXT: lduh [%i0], %o0
-; V8-UNOPT-NEXT: st %f0, [%fp+-8] ! 4-byte Folded Spill
; V8-UNOPT-NEXT: call __gnu_h2f_ieee
-; V8-UNOPT-NEXT: lduh [%i1], %o0
-; V8-UNOPT-NEXT: fmovs %f0, %f1
-; V8-UNOPT-NEXT: ld [%fp+-8], %f0 ! 4-byte Folded Reload
-; V8-UNOPT-NEXT: fmuls %f0, %f1, %f0
+; V8-UNOPT-NEXT: st %f0, [%fp+-16]
+; V8-UNOPT-NEXT: ld [%fp+-16], %f1 ! 4-byte Folded Reload
+; V8-UNOPT-NEXT: fmuls %f1, %f0, %f0
; V8-UNOPT-NEXT: st %f0, [%fp+-4]
; V8-UNOPT-NEXT: call __gnu_f2h_ieee
; V8-UNOPT-NEXT: ld [%fp+-4], %o0
+; V8-UNOPT-NEXT: ld [%fp+-12], %i0 ! 4-byte Folded Reload
; V8-UNOPT-NEXT: sth %o0, [%i0]
; V8-UNOPT-NEXT: ret
; V8-UNOPT-NEXT: restore
diff --git a/llvm/test/CodeGen/SystemZ/swift-return.ll b/llvm/test/CodeGen/SystemZ/swift-return.ll
index 4bbdbcffd527..84e257f93218 100644
--- a/llvm/test/CodeGen/SystemZ/swift-return.ll
+++ b/llvm/test/CodeGen/SystemZ/swift-return.ll
@@ -14,9 +14,10 @@
; CHECK-O0-LABEL: test
; CHECK-O0: st %r2
; CHECK-O0: brasl %r14, gen
-; CHECK-O0-DAG: lhr %r2, %r2
+; CHECK-O0-DAG: lhr %[[REG1:r[0-9]+]], %r2
; CHECK-O0-DAG: lbr %[[REG2:r[0-9]+]], %r3
-; CHECK-O0: ar %r2, %[[REG2]]
+; CHECK-O0: ar %[[REG1]], %[[REG2]]
+; CHECK-O0: lr %r2, %[[REG1]]
define i16 @test(i32 %key) {
entry:
%key.addr = alloca i32, align 4
@@ -60,6 +61,7 @@ declare swiftcc { i16, i8 } @gen(i32)
; CHECK-O0: ar
; CHECK-O0: ar
; CHECK-O0: ar
+; CHECK-O0: lr %r2
define i32 @test2(i32 %key) #0 {
entry:
%key.addr = alloca i32, align 4
diff --git a/llvm/test/CodeGen/SystemZ/swifterror.ll b/llvm/test/CodeGen/SystemZ/swifterror.ll
index d8fe608582c9..76b1e02ebe22 100644
--- a/llvm/test/CodeGen/SystemZ/swifterror.ll
+++ b/llvm/test/CodeGen/SystemZ/swifterror.ll
@@ -16,7 +16,7 @@ define float @foo(%swift_error** swifterror %error_ptr_ref) {
; CHECK-O0-LABEL: foo:
; CHECK-O0: lghi %r2, 16
; CHECK-O0: brasl %r14, malloc
-; CHECK-O0: lgr [[T0:%r[0-9]+]], %r2
+; CHECK-O0: lgr %r0, %r2
; CHECK-O0: mvi 8(%r2), 1
entry:
%call = call i8* @malloc(i64 16)
@@ -118,17 +118,19 @@ define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) {
; CHECK-NOT: %r9
; CHECK: br %r14
; CHECK-O0-LABEL: foo_if:
+; CHECK-O0: chi %r2, 0
; spill to stack
; CHECK-O0: stg %r9, [[OFFS:[0-9]+]](%r15)
-; CHECK-O0: chi %r2, 0
; CHECK-O0: je
; CHECK-O0: lghi %r2, 16
; CHECK-O0: brasl %r14, malloc
-; CHECK-O0: lgr %r9, %r2
+; CHECK-O0: lgr %r[[REG1:[0-9]+]], %r2
; CHECK-O0: mvi 8(%r2), 1
+; CHECK-O0: lgr %r9, %r[[REG1]]
; CHECK-O0: br %r14
; reload from stack
-; CHECK-O0: lg %r9, [[OFFS]](%r15)
+; CHECK-O0: lg %r[[REG2:[0-9]+]], [[OFFS]](%r15)
+; CHECK-O0: lgr %r9, %r[[REG2]]
; CHECK-O0: br %r14
entry:
%cond = icmp ne i32 %cc, 0
@@ -167,10 +169,11 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float
; CHECK-O0: lghi %r2, 16
; CHECK-O0: brasl %r14, malloc
; CHECK-O0: lgr %r[[REG1:[0-9]+]], %r2
-; CHECK-O0: mvi 8(%r[[REG1]]), 1
+; CHECK-O0: mvi 8(%r2), 1
; CHECK-O0: jnh
; reload from stack
-; CHECK-O0: lg %r9, [[OFFS:[0-9]+]](%r15)
+; CHECK-O0: lg %r[[REG2:[0-9]+]], [[OFFS:[0-9]+]](%r15)
+; CHECK-O0: lgr %r9, %r[[REG2]]
; CHECK-O0: br %r14
entry:
br label %bb_loop
@@ -211,17 +214,18 @@ define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swi
; CHECK: br %r14
; CHECK-O0-LABEL: foo_sret:
+; CHECK-O0: lghi %r{{.*}}, 16
; spill sret to stack
-; CHECK-O0-DAG: stg %r2, [[OFFS1:[0-9]+]](%r15)
-; CHECK-O0-DAG: st %r3, [[OFFS2:[0-9]+]](%r15)
-; CHECK-O0: lghi %r2, 16
+; CHECK-O0: stg %r2, [[OFFS1:[0-9]+]](%r15)
+; CHECK-O0: lgr %r2, %r{{.*}}
+; CHECK-O0: st %r3, [[OFFS2:[0-9]+]](%r15)
; CHECK-O0: brasl %r14, malloc
-; CHECK-O0-DAG: lgr %r[[REG3:[0-9]+]], %r2
-; CHECK-O0-DAG: mvi 8(%r[[REG3]]), 1
+; CHECK-O0: lgr {{.*}}, %r2
+; CHECK-O0: mvi 8(%r2), 1
; CHECK-O0-DAG: lg %r[[REG1:[0-9]+]], [[OFFS1]](%r15)
-; CHECK-O0-DAG: lgr %r9, %r[[REG3]]
; CHECK-O0-DAG: l %r[[REG2:[0-9]+]], [[OFFS2]](%r15)
; CHECK-O0: st %r[[REG2]], 4(%r[[REG1]])
+; CHECK-O0: lgr %r9, {{.*}}
; CHECK-O0: br %r14
entry:
%call = call i8* @malloc(i64 16)
@@ -251,6 +255,8 @@ define float @caller3(i8* %error_ref) {
; CHECK-O0-LABEL: caller3:
; CHECK-O0: lghi %r9, 0
; CHECK-O0: lhi %r3, 1
+; CHECK-O0: stg %r2, {{.*}}(%r15)
+; CHECK-O0: lgr %r2, {{.*}}
; CHECK-O0: brasl %r14, foo_sret
; CHECK-O0: lgr {{.*}}, %r9
; CHECK-O0: cghi %r9, 0
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll
index 94a1ec9380fb..38a7e1dbba19 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll
@@ -404,7 +404,7 @@ for.cond.cleanup:
}
; CHECK-MID: check_negated_xor_wls
-; CHECK-MID: t2WhileLoopStart renamable $r2, %bb.3
+; CHECK-MID: t2WhileLoopStart killed renamable $r2, %bb.3
; CHECK-MID: tB %bb.1
; CHECK-MID: bb.1.while.body.preheader:
; CHECK-MID: $lr = t2LoopDec killed renamable $lr, 1
@@ -437,7 +437,7 @@ while.end:
}
; CHECK-MID: check_negated_cmp_wls
-; CHECK-MID: t2WhileLoopStart renamable $r2, %bb.3
+; CHECK-MID: t2WhileLoopStart killed renamable $r2, %bb.3
; CHECK-MID: tB %bb.1
; CHECK-MID: bb.1.while.body.preheader:
; CHECK-MID: $lr = t2LoopDec killed renamable $lr, 1
diff --git a/llvm/test/CodeGen/Thumb2/high-reg-spill.mir b/llvm/test/CodeGen/Thumb2/high-reg-spill.mir
index 4e2197c0f0f0..ace7a38ec10b 100644
--- a/llvm/test/CodeGen/Thumb2/high-reg-spill.mir
+++ b/llvm/test/CodeGen/Thumb2/high-reg-spill.mir
@@ -38,8 +38,10 @@ body: |
bb.0.entry:
; CHECK-LABEL: name: constraint_h
; CHECK: renamable $r0 = tLDRspi %stack.0.i, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from %ir.i)
- ; CHECK: renamable $r8 = COPY killed renamable $r0
- ; CHECK: INLINEASM &"@ $0", 1 /* sideeffect attdialect */, 589833 /* reguse:GPRnopc */, killed renamable $r8, 12 /* clobber */, implicit-def dead early-clobber $r12
+ ; CHECK: renamable $r12 = COPY killed renamable $r0
+ ; CHECK: t2STRi12 killed $r12, %stack.1, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.1)
+ ; CHECK: $r8 = t2LDRi12 %stack.1, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.1)
+ ; CHECK: INLINEASM &"@ $0", 1 /* sideeffect attdialect */, 589833 /* reguse:GPRnopc */, renamable $r8, 12 /* clobber */, implicit-def early-clobber $r12
; CHECK: tBX_RET 14 /* CC::al */, $noreg
%1:tgpr = tLDRspi %stack.0.i, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i)
%0:hgpr = COPY %1
diff --git a/llvm/test/CodeGen/Thumb2/mve-vector-spill.ll b/llvm/test/CodeGen/Thumb2/mve-vector-spill.ll
index 647ad2e8182e..3a33825a0b0d 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vector-spill.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vector-spill.ll
@@ -10,11 +10,13 @@ define arm_aapcs_vfpcc void @spill_vector_i32(<4 x i32> %v, <4 x i32>* %p) {
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .pad #40
; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: vmov q1, q0
+; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill
; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill
; CHECK-NEXT: bl external_function
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: add sp, #40
; CHECK-NEXT: pop {r7, pc}
@@ -31,11 +33,13 @@ define arm_aapcs_vfpcc void @spill_vector_i16(<8 x i16> %v, <8 x i16>* %p) {
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .pad #40
; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: vmov q1, q0
+; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill
; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill
; CHECK-NEXT: bl external_function
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
; CHECK-NEXT: vstrh.16 q0, [r0]
; CHECK-NEXT: add sp, #40
; CHECK-NEXT: pop {r7, pc}
@@ -52,11 +56,13 @@ define arm_aapcs_vfpcc void @spill_vector_i8(<16 x i8> %v, <16 x i8>* %p) {
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .pad #40
; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: vmov q1, q0
+; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill
; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill
; CHECK-NEXT: bl external_function
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
; CHECK-NEXT: vstrb.8 q0, [r0]
; CHECK-NEXT: add sp, #40
; CHECK-NEXT: pop {r7, pc}
@@ -73,11 +79,13 @@ define arm_aapcs_vfpcc void @spill_vector_i64(<2 x i64> %v, <2 x i64>* %p) {
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .pad #40
; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: vmov q1, q0
+; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill
; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill
; CHECK-NEXT: bl external_function
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: add sp, #40
; CHECK-NEXT: pop {r7, pc}
@@ -94,11 +102,13 @@ define arm_aapcs_vfpcc void @spill_vector_f32(<4 x float> %v, <4 x float>* %p) {
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .pad #40
; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: vmov q1, q0
+; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill
; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill
; CHECK-NEXT: bl external_function
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: add sp, #40
; CHECK-NEXT: pop {r7, pc}
@@ -115,11 +125,13 @@ define arm_aapcs_vfpcc void @spill_vector_f16(<8 x half> %v, <8 x half>* %p) {
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .pad #40
; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: vmov q1, q0
+; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill
; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill
; CHECK-NEXT: bl external_function
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: add sp, #40
; CHECK-NEXT: pop {r7, pc}
@@ -134,15 +146,15 @@ define arm_aapcs_vfpcc void @spill_vector_f64(<2 x double> %v, <2 x double>* %p)
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .pad #40
-; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT: .pad #24
+; CHECK-NEXT: sub sp, #24
+; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill
; CHECK-NEXT: bl external_function
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: vstrw.32 q0, [r0]
-; CHECK-NEXT: add sp, #40
+; CHECK-NEXT: add sp, #24
; CHECK-NEXT: pop {r7, pc}
entry:
call void @external_function()
diff --git a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
index 3f4af707ff82..b5635c7e0f06 100644
--- a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
+++ b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
@@ -20,15 +20,18 @@ define i32 @z() nounwind ssp {
; CHECK-NEXT: movb $15, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: movl $8, %ecx
+; CHECK-NEXT: leal {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT: leal {{[0-9]+}}(%esp), %esi
-; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; CHECK-NEXT: movl %eax, %edi
+; CHECK-NEXT: movl %edx, %esi
; CHECK-NEXT: rep;movsl (%esi), %es:(%edi)
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: addl $36, %ecx
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
-; CHECK-NEXT: movl %eax, %edi
-; CHECK-NEXT: addl $36, %edi
+; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; CHECK-NEXT: movl %edx, %esi
; CHECK-NEXT: rep;movsl (%esi), %es:(%edi)
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
; CHECK-NEXT: movb %cl, 32(%eax)
@@ -39,11 +42,11 @@ define i32 @z() nounwind ssp {
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: ## %bb.1: ## %return
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl L___stack_chk_guard$non_lazy_ptr, %ecx
+; CHECK-NEXT: movl (%ecx), %ecx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: cmpl %edx, %ecx
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT: movl L___stack_chk_guard$non_lazy_ptr, %eax
-; CHECK-NEXT: movl (%eax), %eax
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: cmpl %ecx, %eax
; CHECK-NEXT: jne LBB0_3
; CHECK-NEXT: ## %bb.2: ## %SP_return
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
diff --git a/llvm/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll b/llvm/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll
index 54540f3e6538..6a43e864e965 100644
--- a/llvm/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll
+++ b/llvm/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll
@@ -10,8 +10,12 @@ target triple = "i386-apple-darwin10"
define i32 @func(i8* %s) nounwind ssp {
; CHECK-LABEL: func:
; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: pushl %eax
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; CHECK-NEXT: movl (%esp), %ecx ## 4-byte Reload
; CHECK-NEXT: ## InlineAsm Start
; CHECK-NEXT: arg0 %eax
; CHECK-NEXT: arg1 %ecx
@@ -19,7 +23,10 @@ define i32 @func(i8* %s) nounwind ssp {
; CHECK-NEXT: arg3 %esi
; CHECK-NEXT: arg4 %ecx
; CHECK-NEXT: ## InlineAsm End
+; CHECK-NEXT: movl %ecx, %edi
+; CHECK-NEXT: addl $4, %esp
; CHECK-NEXT: popl %esi
+; CHECK-NEXT: popl %edi
; CHECK-NEXT: retl
entry:
%0 = tail call %asmtype asm "arg0 $0\0A\09arg1 $1\0A\09arg2 $2\0A\09arg3 $3\0A\09arg4 $4", "={ax},=r,=r,=r,1,~{dirflag},~{fpsr},~{flags}"(i8* %s) nounwind, !srcloc !0 ; <%0> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll b/llvm/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll
index 70228bb47f4d..5ef867d4f9dc 100644
--- a/llvm/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll
+++ b/llvm/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll
@@ -15,20 +15,20 @@
; CHECK-LABEL: @test_bitcast
; Load the value of the function pointer: %loaded_ptr
+; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]]
; Spill %arg2.
; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]]
-; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]]
; Spill %loaded_ptr.
; CHECK: movq [[LOADED_PTR]], [[LOADED_PTR_SLOT:[0-9]*\(%[a-z]+\)]]
; Perform the indirect call.
-; Load the function pointer.
-; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]]
-; Load the third argument
-; CHECK: movq [[ARG2_SLOT]], %rdx
; Load the first argument
-; CHECK: movq %rdx, %rdi
+; CHECK: movq [[ARG2_SLOT]], %rdi
; Load the second argument
-; CHECK: movq %rdx, %rsi
+; CHECK: movq [[ARG2_SLOT]], %rsi
+; Load the third argument
+; CHECK: movq [[ARG2_SLOT]], %rdx
+; Load the function pointer.
+; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]]
; Call.
; CHECK: callq *[[FCT_PTR]]
; CHECK: ret
@@ -54,20 +54,20 @@ label_end:
; CHECK-LABEL: @test_inttoptr
; Load the value of the function pointer: %loaded_ptr
-; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]]
-; Spill %loaded_ptr.
; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]]
; Spill %arg2.
+; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]]
+; Spill %loaded_ptr.
; CHECK: movq [[LOADED_PTR]], [[LOADED_PTR_SLOT:[0-9]*\(%[a-z]+\)]]
; Perform the indirect call.
-; Load the function pointer.
-; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]]
-; Load the third argument
-; CHECK: movq [[ARG2_SLOT]], %rdx
; Load the first argument
-; CHECK: movq %rdx, %rdi
+; CHECK: movq [[ARG2_SLOT]], %rdi
; Load the second argument
-; CHECK: movq %rdx, %rsi
+; CHECK: movq [[ARG2_SLOT]], %rsi
+; Load the third argument
+; CHECK: movq [[ARG2_SLOT]], %rdx
+; Load the function pointer.
+; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]]
; Call.
; CHECK: callq *[[FCT_PTR]]
; CHECK: ret
@@ -92,21 +92,21 @@ label_end:
}
; CHECK-LABEL: @test_ptrtoint
-; Spill %arg2.
-; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]]
; Load the value of the function pointer: %loaded_ptr
; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]]
+; Spill %arg2.
+; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]]
; Spill %loaded_ptr.
; CHECK: movq [[LOADED_PTR]], [[LOADED_PTR_SLOT:[0-9]*\(%[a-z]+\)]]
; Perform the indirect call.
-; Load the function pointer.
-; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]]
-; Load the third argument
-; CHECK: movq [[ARG2_SLOT]], %rdx
; Load the first argument
-; CHECK: movq %rdx, %rdi
+; CHECK: movq [[ARG2_SLOT]], %rdi
; Load the second argument
-; CHECK: movq %rdx, %rsi
+; CHECK: movq [[ARG2_SLOT]], %rsi
+; Load the third argument
+; CHECK: movq [[ARG2_SLOT]], %rdx
+; Load the function pointer.
+; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]]
; Call.
; CHECK: callq *[[FCT_PTR]]
; CHECK: ret
diff --git a/llvm/test/CodeGen/X86/atomic-monotonic.ll b/llvm/test/CodeGen/X86/atomic-monotonic.ll
index b1eecdfdc0b2..9cab2d7d2b25 100644
--- a/llvm/test/CodeGen/X86/atomic-monotonic.ll
+++ b/llvm/test/CodeGen/X86/atomic-monotonic.ll
@@ -14,8 +14,8 @@ define i8 @load_i8(i8* %ptr) {
define void @store_i8(i8* %ptr, i8 %v) {
; CHECK-O0-LABEL: store_i8:
; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movb %sil, %al
-; CHECK-O0-NEXT: movb %al, (%rdi)
+; CHECK-O0-NEXT: # kill: def $sil killed $sil killed $esi
+; CHECK-O0-NEXT: movb %sil, (%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: store_i8:
@@ -44,8 +44,8 @@ define i16 @load_i16(i16* %ptr) {
define void @store_i16(i16* %ptr, i16 %v) {
; CHECK-O0-LABEL: store_i16:
; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movw %si, %ax
-; CHECK-O0-NEXT: movw %ax, (%rdi)
+; CHECK-O0-NEXT: # kill: def $si killed $si killed $esi
+; CHECK-O0-NEXT: movw %si, (%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: store_i16:
diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll
index 7b255c7b6c1a..7a1f34c65c18 100644
--- a/llvm/test/CodeGen/X86/atomic-unordered.ll
+++ b/llvm/test/CodeGen/X86/atomic-unordered.ll
@@ -16,8 +16,8 @@ define i8 @load_i8(i8* %ptr) {
define void @store_i8(i8* %ptr, i8 %v) {
; CHECK-O0-LABEL: store_i8:
; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movb %sil, %al
-; CHECK-O0-NEXT: movb %al, (%rdi)
+; CHECK-O0-NEXT: # kill: def $sil killed $sil killed $esi
+; CHECK-O0-NEXT: movb %sil, (%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: store_i8:
@@ -46,8 +46,8 @@ define i16 @load_i16(i16* %ptr) {
define void @store_i16(i16* %ptr, i16 %v) {
; CHECK-O0-LABEL: store_i16:
; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movw %si, %ax
-; CHECK-O0-NEXT: movw %ax, (%rdi)
+; CHECK-O0-NEXT: # kill: def $si killed $si killed $esi
+; CHECK-O0-NEXT: movw %si, (%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: store_i16:
@@ -231,10 +231,11 @@ define i128 @load_i128(i128* %ptr) {
; CHECK-O0-NEXT: .cfi_def_cfa_offset 16
; CHECK-O0-NEXT: .cfi_offset %rbx, -16
; CHECK-O0-NEXT: xorl %eax, %eax
-; CHECK-O0-NEXT: movl %eax, %ebx
-; CHECK-O0-NEXT: movq %rbx, %rax
-; CHECK-O0-NEXT: movq %rbx, %rdx
-; CHECK-O0-NEXT: movq %rbx, %rcx
+; CHECK-O0-NEXT: # kill: def $rax killed $eax
+; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
; CHECK-O0-NEXT: lock cmpxchg16b (%rdi)
; CHECK-O0-NEXT: popq %rbx
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
@@ -263,24 +264,24 @@ define void @store_i128(i128* %ptr, i128 %v) {
; CHECK-O0-NEXT: pushq %rbx
; CHECK-O0-NEXT: .cfi_def_cfa_offset 16
; CHECK-O0-NEXT: .cfi_offset %rbx, -16
+; CHECK-O0-NEXT: movq (%rdi), %rax
+; CHECK-O0-NEXT: movq 8(%rdi), %rcx
; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-O0-NEXT: movq (%rdi), %rax
-; CHECK-O0-NEXT: movq 8(%rdi), %rdx
; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: jmp .LBB16_1
; CHECK-O0-NEXT: .LBB16_1: # %atomicrmw.start
; CHECK-O0-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
; CHECK-O0-NEXT: lock cmpxchg16b (%rsi)
-; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: jne .LBB16_1
; CHECK-O0-NEXT: jmp .LBB16_2
; CHECK-O0-NEXT: .LBB16_2: # %atomicrmw.end
@@ -316,22 +317,24 @@ define i256 @load_i256(i256* %ptr) {
; CHECK-O0-NEXT: subq $56, %rsp
; CHECK-O0-NEXT: .cfi_def_cfa_offset 64
; CHECK-O0-NEXT: movq %rdi, %rax
-; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-O0-NEXT: movl $32, %edi
+; CHECK-O0-NEXT: movl $32, %ecx
; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; CHECK-O0-NEXT: xorl %ecx, %ecx
+; CHECK-O0-NEXT: xorl %r8d, %r8d
+; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: movq %rcx, %rdi
+; CHECK-O0-NEXT: movl %r8d, %ecx
+; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: callq __atomic_load
-; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rax
; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx
; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rsi
-; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %r8
-; CHECK-O0-NEXT: movq %r8, 24(%rdi)
-; CHECK-O0-NEXT: movq %rsi, 16(%rdi)
-; CHECK-O0-NEXT: movq %rdx, 8(%rdi)
-; CHECK-O0-NEXT: movq %rcx, (%rdi)
+; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; CHECK-O0-NEXT: movq %rsi, 24(%rdi)
+; CHECK-O0-NEXT: movq %rdx, 16(%rdi)
+; CHECK-O0-NEXT: movq %rcx, 8(%rdi)
+; CHECK-O0-NEXT: movq %rax, (%rdi)
+; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; CHECK-O0-NEXT: addq $56, %rsp
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
; CHECK-O0-NEXT: retq
@@ -366,18 +369,18 @@ define void @store_i256(i256* %ptr, i256 %v) {
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: subq $40, %rsp
; CHECK-O0-NEXT: .cfi_def_cfa_offset 48
-; CHECK-O0-NEXT: movq %rcx, %rax
-; CHECK-O0-NEXT: movq %rdx, (%rsp) # 8-byte Spill
-; CHECK-O0-NEXT: movq %rsi, %r9
-; CHECK-O0-NEXT: movq %rdi, %rsi
-; CHECK-O0-NEXT: movq (%rsp), %rdi # 8-byte Reload
-; CHECK-O0-NEXT: xorl %ecx, %ecx
-; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; CHECK-O0-NEXT: movq %r9, {{[0-9]+}}(%rsp)
-; CHECK-O0-NEXT: movq %rdi, {{[0-9]+}}(%rsp)
-; CHECK-O0-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-O0-NEXT: xorl %eax, %eax
+; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %r9
+; CHECK-O0-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
+; CHECK-O0-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; CHECK-O0-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
; CHECK-O0-NEXT: movq %r8, {{[0-9]+}}(%rsp)
-; CHECK-O0-NEXT: movl $32, %edi
+; CHECK-O0-NEXT: movl $32, %ecx
+; CHECK-O0-NEXT: movq %rdi, (%rsp) # 8-byte Spill
+; CHECK-O0-NEXT: movq %rcx, %rdi
+; CHECK-O0-NEXT: movq (%rsp), %rsi # 8-byte Reload
+; CHECK-O0-NEXT: movq %r9, %rdx
+; CHECK-O0-NEXT: movl %eax, %ecx
; CHECK-O0-NEXT: callq __atomic_store
; CHECK-O0-NEXT: addq $40, %rsp
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
@@ -408,10 +411,10 @@ define void @store_i256(i256* %ptr, i256 %v) {
define void @vec_store(i32* %p0, <2 x i32> %vec) {
; CHECK-O0-CUR-LABEL: vec_store:
; CHECK-O0-CUR: # %bb.0:
-; CHECK-O0-CUR-NEXT: vmovd %xmm0, %ecx
-; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %eax
-; CHECK-O0-CUR-NEXT: movl %ecx, (%rdi)
-; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi)
+; CHECK-O0-CUR-NEXT: vmovd %xmm0, %eax
+; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %ecx
+; CHECK-O0-CUR-NEXT: movl %eax, (%rdi)
+; CHECK-O0-CUR-NEXT: movl %ecx, 4(%rdi)
; CHECK-O0-CUR-NEXT: retq
;
; CHECK-O3-CUR-LABEL: vec_store:
@@ -445,10 +448,10 @@ define void @vec_store(i32* %p0, <2 x i32> %vec) {
define void @vec_store_unaligned(i32* %p0, <2 x i32> %vec) {
; CHECK-O0-CUR-LABEL: vec_store_unaligned:
; CHECK-O0-CUR: # %bb.0:
-; CHECK-O0-CUR-NEXT: vmovd %xmm0, %ecx
-; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %eax
-; CHECK-O0-CUR-NEXT: movl %ecx, (%rdi)
-; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi)
+; CHECK-O0-CUR-NEXT: vmovd %xmm0, %eax
+; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %ecx
+; CHECK-O0-CUR-NEXT: movl %eax, (%rdi)
+; CHECK-O0-CUR-NEXT: movl %ecx, 4(%rdi)
; CHECK-O0-CUR-NEXT: retq
;
; CHECK-O3-CUR-LABEL: vec_store_unaligned:
@@ -591,11 +594,17 @@ define i64 @load_fold_add1(i64* %p) {
}
define i64 @load_fold_add2(i64* %p, i64 %v2) {
-; CHECK-LABEL: load_fold_add2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: addq (%rdi), %rax
-; CHECK-NEXT: retq
+; CHECK-O0-LABEL: load_fold_add2:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: addq (%rdi), %rsi
+; CHECK-O0-NEXT: movq %rsi, %rax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-O3-LABEL: load_fold_add2:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movq %rsi, %rax
+; CHECK-O3-NEXT: addq (%rdi), %rax
+; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
%ret = add i64 %v, %v2
ret i64 %ret
@@ -685,11 +694,17 @@ define i64 @load_fold_mul1(i64* %p) {
}
define i64 @load_fold_mul2(i64* %p, i64 %v2) {
-; CHECK-LABEL: load_fold_mul2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: imulq (%rdi), %rax
-; CHECK-NEXT: retq
+; CHECK-O0-LABEL: load_fold_mul2:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: imulq (%rdi), %rsi
+; CHECK-O0-NEXT: movq %rsi, %rax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-O3-LABEL: load_fold_mul2:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movq %rsi, %rax
+; CHECK-O3-NEXT: imulq (%rdi), %rax
+; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
%ret = mul i64 %v, %v2
ret i64 %ret
@@ -1114,8 +1129,8 @@ define i64 @load_fold_shl1(i64* %p) {
define i64 @load_fold_shl2(i64* %p, i64 %v2) {
; CHECK-O0-LABEL: load_fold_shl2:
; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movq %rsi, %rcx
; CHECK-O0-NEXT: movq (%rdi), %rax
+; CHECK-O0-NEXT: movq %rsi, %rcx
; CHECK-O0-NEXT: # kill: def $cl killed $rcx
; CHECK-O0-NEXT: shlq %cl, %rax
; CHECK-O0-NEXT: retq
@@ -1164,8 +1179,8 @@ define i64 @load_fold_lshr1(i64* %p) {
define i64 @load_fold_lshr2(i64* %p, i64 %v2) {
; CHECK-O0-LABEL: load_fold_lshr2:
; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movq %rsi, %rcx
; CHECK-O0-NEXT: movq (%rdi), %rax
+; CHECK-O0-NEXT: movq %rsi, %rcx
; CHECK-O0-NEXT: # kill: def $cl killed $rcx
; CHECK-O0-NEXT: shrq %cl, %rax
; CHECK-O0-NEXT: retq
@@ -1214,8 +1229,8 @@ define i64 @load_fold_ashr1(i64* %p) {
define i64 @load_fold_ashr2(i64* %p, i64 %v2) {
; CHECK-O0-LABEL: load_fold_ashr2:
; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movq %rsi, %rcx
; CHECK-O0-NEXT: movq (%rdi), %rax
+; CHECK-O0-NEXT: movq %rsi, %rcx
; CHECK-O0-NEXT: # kill: def $cl killed $rcx
; CHECK-O0-NEXT: sarq %cl, %rax
; CHECK-O0-NEXT: retq
@@ -1268,11 +1283,17 @@ define i64 @load_fold_and1(i64* %p) {
}
define i64 @load_fold_and2(i64* %p, i64 %v2) {
-; CHECK-LABEL: load_fold_and2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: andq (%rdi), %rax
-; CHECK-NEXT: retq
+; CHECK-O0-LABEL: load_fold_and2:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: andq (%rdi), %rsi
+; CHECK-O0-NEXT: movq %rsi, %rax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-O3-LABEL: load_fold_and2:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movq %rsi, %rax
+; CHECK-O3-NEXT: andq (%rdi), %rax
+; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
%ret = and i64 %v, %v2
ret i64 %ret
@@ -1315,11 +1336,17 @@ define i64 @load_fold_or1(i64* %p) {
}
define i64 @load_fold_or2(i64* %p, i64 %v2) {
-; CHECK-LABEL: load_fold_or2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: orq (%rdi), %rax
-; CHECK-NEXT: retq
+; CHECK-O0-LABEL: load_fold_or2:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: orq (%rdi), %rsi
+; CHECK-O0-NEXT: movq %rsi, %rax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-O3-LABEL: load_fold_or2:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movq %rsi, %rax
+; CHECK-O3-NEXT: orq (%rdi), %rax
+; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
%ret = or i64 %v, %v2
ret i64 %ret
@@ -1362,11 +1389,17 @@ define i64 @load_fold_xor1(i64* %p) {
}
define i64 @load_fold_xor2(i64* %p, i64 %v2) {
-; CHECK-LABEL: load_fold_xor2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: xorq (%rdi), %rax
-; CHECK-NEXT: retq
+; CHECK-O0-LABEL: load_fold_xor2:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: xorq (%rdi), %rsi
+; CHECK-O0-NEXT: movq %rsi, %rax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-O3-LABEL: load_fold_xor2:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movq %rsi, %rax
+; CHECK-O3-NEXT: xorq (%rdi), %rax
+; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
%ret = xor i64 %v, %v2
ret i64 %ret
@@ -1401,7 +1434,9 @@ define i1 @load_fold_icmp1(i64* %p) {
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: subq $15, %rax
-; CHECK-O0-NEXT: sete %al
+; CHECK-O0-NEXT: sete %cl
+; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: movb %cl, %al
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: load_fold_icmp1:
@@ -1419,7 +1454,9 @@ define i1 @load_fold_icmp2(i64* %p, i64 %v2) {
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: subq %rsi, %rax
-; CHECK-O0-NEXT: sete %al
+; CHECK-O0-NEXT: sete %cl
+; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: movb %cl, %al
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: load_fold_icmp2:
@@ -1438,7 +1475,9 @@ define i1 @load_fold_icmp3(i64* %p1, i64* %p2) {
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: movq (%rsi), %rcx
; CHECK-O0-NEXT: subq %rcx, %rax
-; CHECK-O0-NEXT: sete %al
+; CHECK-O0-NEXT: sete %cl
+; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: movb %cl, %al
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: load_fold_icmp3:
@@ -1581,17 +1620,17 @@ define void @rmw_fold_mul2(i64* %p, i64 %v) {
define void @rmw_fold_sdiv1(i64* %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_sdiv1:
; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movq (%rdi), %rcx
-; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
-; CHECK-O0-NEXT: movq %rcx, %rax
-; CHECK-O0-NEXT: imulq %rdx
-; CHECK-O0-NEXT: movq %rdx, %rax
-; CHECK-O0-NEXT: addq %rcx, %rax
-; CHECK-O0-NEXT: movq %rax, %rcx
+; CHECK-O0-NEXT: movq (%rdi), %rax
+; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
+; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: imulq %rcx
+; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; CHECK-O0-NEXT: addq %rax, %rdx
+; CHECK-O0-NEXT: movq %rdx, %rcx
; CHECK-O0-NEXT: shrq $63, %rcx
-; CHECK-O0-NEXT: sarq $3, %rax
-; CHECK-O0-NEXT: addq %rcx, %rax
-; CHECK-O0-NEXT: movq %rax, (%rdi)
+; CHECK-O0-NEXT: sarq $3, %rdx
+; CHECK-O0-NEXT: addq %rcx, %rdx
+; CHECK-O0-NEXT: movq %rdx, (%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: rmw_fold_sdiv1:
@@ -1722,17 +1761,16 @@ define void @rmw_fold_srem1(i64* %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_srem1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
-; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
+; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: imulq %rcx
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; CHECK-O0-NEXT: addq %rax, %rdx
; CHECK-O0-NEXT: movq %rdx, %rcx
-; CHECK-O0-NEXT: addq %rax, %rcx
-; CHECK-O0-NEXT: movq %rcx, %rdx
-; CHECK-O0-NEXT: shrq $63, %rdx
-; CHECK-O0-NEXT: sarq $3, %rcx
-; CHECK-O0-NEXT: addq %rdx, %rcx
-; CHECK-O0-NEXT: leaq (%rcx,%rcx,4), %rcx
+; CHECK-O0-NEXT: shrq $63, %rcx
+; CHECK-O0-NEXT: sarq $3, %rdx
+; CHECK-O0-NEXT: addq %rcx, %rdx
+; CHECK-O0-NEXT: leaq (%rdx,%rdx,4), %rcx
; CHECK-O0-NEXT: leaq (%rcx,%rcx,2), %rcx
; CHECK-O0-NEXT: subq %rcx, %rax
; CHECK-O0-NEXT: movq %rax, (%rdi)
@@ -1894,9 +1932,9 @@ define void @rmw_fold_shl2(i64* %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_shl2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
-; CHECK-O0-NEXT: movb %sil, %dl
+; CHECK-O0-NEXT: # kill: def $sil killed $sil killed $rsi
; CHECK-O0-NEXT: # implicit-def: $rcx
-; CHECK-O0-NEXT: movb %dl, %cl
+; CHECK-O0-NEXT: movb %sil, %cl
; CHECK-O0-NEXT: shlxq %rcx, %rax, %rax
; CHECK-O0-NEXT: movq %rax, (%rdi)
; CHECK-O0-NEXT: retq
@@ -1950,9 +1988,9 @@ define void @rmw_fold_lshr2(i64* %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_lshr2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
-; CHECK-O0-NEXT: movb %sil, %dl
+; CHECK-O0-NEXT: # kill: def $sil killed $sil killed $rsi
; CHECK-O0-NEXT: # implicit-def: $rcx
-; CHECK-O0-NEXT: movb %dl, %cl
+; CHECK-O0-NEXT: movb %sil, %cl
; CHECK-O0-NEXT: shrxq %rcx, %rax, %rax
; CHECK-O0-NEXT: movq %rax, (%rdi)
; CHECK-O0-NEXT: retq
@@ -2006,9 +2044,9 @@ define void @rmw_fold_ashr2(i64* %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_ashr2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
-; CHECK-O0-NEXT: movb %sil, %dl
+; CHECK-O0-NEXT: # kill: def $sil killed $sil killed $rsi
; CHECK-O0-NEXT: # implicit-def: $rcx
-; CHECK-O0-NEXT: movb %dl, %cl
+; CHECK-O0-NEXT: movb %sil, %cl
; CHECK-O0-NEXT: sarxq %rcx, %rax, %rax
; CHECK-O0-NEXT: movq %rax, (%rdi)
; CHECK-O0-NEXT: retq
@@ -2230,12 +2268,12 @@ define i32 @fold_trunc_or(i64* %p, i32 %v2) {
define i32 @split_load(i64* %p) {
; CHECK-O0-LABEL: split_load:
; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movq (%rdi), %rcx
-; CHECK-O0-NEXT: movb %cl, %al
-; CHECK-O0-NEXT: shrq $32, %rcx
-; CHECK-O0-NEXT: # kill: def $cl killed $cl killed $rcx
-; CHECK-O0-NEXT: orb %cl, %al
-; CHECK-O0-NEXT: movzbl %al, %eax
+; CHECK-O0-NEXT: movq (%rdi), %rax
+; CHECK-O0-NEXT: movb %al, %cl
+; CHECK-O0-NEXT: shrq $32, %rax
+; CHECK-O0-NEXT: # kill: def $al killed $al killed $rax
+; CHECK-O0-NEXT: orb %al, %cl
+; CHECK-O0-NEXT: movzbl %cl, %eax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: split_load:
@@ -2373,8 +2411,8 @@ define i64 @nofold_stfence(i64* %p) {
define i64 @fold_constant(i64 %arg) {
; CHECK-O0-LABEL: fold_constant:
; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: addq Constant, %rdi
; CHECK-O0-NEXT: movq %rdi, %rax
-; CHECK-O0-NEXT: addq Constant, %rax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: fold_constant:
@@ -2564,9 +2602,10 @@ define i32 @load_i8_anyext_i32(i8* %ptr) {
define i32 @load_i16_anyext_i32(i16* %ptr) {
; CHECK-O0-CUR-LABEL: load_i16_anyext_i32:
; CHECK-O0-CUR: # %bb.0:
-; CHECK-O0-CUR-NEXT: movw (%rdi), %cx
-; CHECK-O0-CUR-NEXT: # implicit-def: $eax
-; CHECK-O0-CUR-NEXT: movw %cx, %ax
+; CHECK-O0-CUR-NEXT: movw (%rdi), %ax
+; CHECK-O0-CUR-NEXT: # implicit-def: $ecx
+; CHECK-O0-CUR-NEXT: movw %ax, %cx
+; CHECK-O0-CUR-NEXT: movl %ecx, %eax
; CHECK-O0-CUR-NEXT: retq
;
; CHECK-O3-CUR-LABEL: load_i16_anyext_i32:
@@ -2594,10 +2633,10 @@ define i32 @load_i16_anyext_i32(i16* %ptr) {
define i64 @load_i16_anyext_i64(i16* %ptr) {
; CHECK-O0-CUR-LABEL: load_i16_anyext_i64:
; CHECK-O0-CUR: # %bb.0:
-; CHECK-O0-CUR-NEXT: movw (%rdi), %cx
-; CHECK-O0-CUR-NEXT: # implicit-def: $eax
-; CHECK-O0-CUR-NEXT: movw %cx, %ax
-; CHECK-O0-CUR-NEXT: vmovd %eax, %xmm0
+; CHECK-O0-CUR-NEXT: movw (%rdi), %ax
+; CHECK-O0-CUR-NEXT: # implicit-def: $ecx
+; CHECK-O0-CUR-NEXT: movw %ax, %cx
+; CHECK-O0-CUR-NEXT: vmovd %ecx, %xmm0
; CHECK-O0-CUR-NEXT: vmovq %xmm0, %rax
; CHECK-O0-CUR-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/atomic32.ll b/llvm/test/CodeGen/X86/atomic32.ll
index 022aa38a4554..05a10966a4f1 100644
--- a/llvm/test/CodeGen/X86/atomic32.ll
+++ b/llvm/test/CodeGen/X86/atomic32.ll
@@ -71,8 +71,9 @@ define void @atomic_fetch_and32() nounwind {
; X64-NEXT: andl $5, %ecx
; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)
; X64-NEXT: sete %cl
-; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: testb $1, %cl
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB2_2
; X64-NEXT: jmp .LBB2_1
@@ -94,9 +95,10 @@ define void @atomic_fetch_and32() nounwind {
; X86-NEXT: andl $5, %ecx
; X86-NEXT: lock cmpxchgl %ecx, sc32
; X86-NEXT: sete %cl
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: testb $1, %cl
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: jne .LBB2_2
; X86-NEXT: jmp .LBB2_1
; X86-NEXT: .LBB2_2: # %atomicrmw.end
@@ -123,8 +125,9 @@ define void @atomic_fetch_or32() nounwind {
; X64-NEXT: orl $5, %ecx
; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)
; X64-NEXT: sete %cl
-; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: testb $1, %cl
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB3_2
; X64-NEXT: jmp .LBB3_1
@@ -146,9 +149,10 @@ define void @atomic_fetch_or32() nounwind {
; X86-NEXT: orl $5, %ecx
; X86-NEXT: lock cmpxchgl %ecx, sc32
; X86-NEXT: sete %cl
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: testb $1, %cl
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: jne .LBB3_2
; X86-NEXT: jmp .LBB3_1
; X86-NEXT: .LBB3_2: # %atomicrmw.end
@@ -175,8 +179,9 @@ define void @atomic_fetch_xor32() nounwind {
; X64-NEXT: xorl $5, %ecx
; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)
; X64-NEXT: sete %cl
-; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: testb $1, %cl
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB4_2
; X64-NEXT: jmp .LBB4_1
@@ -198,9 +203,10 @@ define void @atomic_fetch_xor32() nounwind {
; X86-NEXT: xorl $5, %ecx
; X86-NEXT: lock cmpxchgl %ecx, sc32
; X86-NEXT: sete %cl
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: testb $1, %cl
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: jne .LBB4_2
; X86-NEXT: jmp .LBB4_1
; X86-NEXT: .LBB4_2: # %atomicrmw.end
@@ -217,16 +223,15 @@ define void @atomic_fetch_xor32() nounwind {
define void @atomic_fetch_nand32(i32 %x) nounwind {
; X64-LABEL: atomic_fetch_nand32:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl sc32, %eax
+; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: .LBB5_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
; X64-NEXT: andl %edx, %ecx
-; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: notl %ecx
; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)
; X64-NEXT: sete %cl
@@ -239,27 +244,26 @@ define void @atomic_fetch_nand32(i32 %x) nounwind {
;
; X86-LABEL: atomic_fetch_nand32:
; X86: # %bb.0:
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: subl $8, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl sc32, %ecx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl sc32, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-NEXT: .LBB5_1: # %atomicrmw.start
; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: andl %edx, %ecx
-; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-NEXT: notl %ecx
; X86-NEXT: lock cmpxchgl %ecx, sc32
; X86-NEXT: sete %cl
; X86-NEXT: testb $1, %cl
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: jne .LBB5_2
; X86-NEXT: jmp .LBB5_1
; X86-NEXT: .LBB5_2: # %atomicrmw.end
-; X86-NEXT: addl $12, %esp
+; X86-NEXT: addl $8, %esp
; X86-NEXT: retl
%t1 = atomicrmw nand i32* @sc32, i32 %x acquire
ret void
@@ -268,21 +272,21 @@ define void @atomic_fetch_nand32(i32 %x) nounwind {
define void @atomic_fetch_max32(i32 %x) nounwind {
; X64-LABEL: atomic_fetch_max32:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl sc32, %eax
+; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: .LBB6_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
-; X64-NEXT: movl %eax, %edx
-; X64-NEXT: subl %ecx, %edx
-; X64-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; X64-NEXT: cmovgl %eax, %ecx
-; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)
-; X64-NEXT: sete %cl
-; X64-NEXT: testb $1, %cl
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
+; X64-NEXT: subl %edx, %ecx
+; X64-NEXT: cmovgl %eax, %edx
+; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)
+; X64-NEXT: sete %dl
+; X64-NEXT: testb $1, %dl
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB6_2
; X64-NEXT: jmp .LBB6_1
; X64-NEXT: .LBB6_2: # %atomicrmw.end
@@ -292,21 +296,21 @@ define void @atomic_fetch_max32(i32 %x) nounwind {
; X86-CMOV: # %bb.0:
; X86-CMOV-NEXT: subl $12, %esp
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-CMOV-NEXT: movl sc32, %ecx
; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-CMOV-NEXT: movl sc32, %eax
-; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: .LBB6_1: # %atomicrmw.start
; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1
; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-CMOV-NEXT: movl %eax, %edx
-; X86-CMOV-NEXT: subl %ecx, %edx
-; X86-CMOV-NEXT: movl %edx, (%esp) # 4-byte Spill
-; X86-CMOV-NEXT: cmovgl %eax, %ecx
-; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32
-; X86-CMOV-NEXT: sete %cl
-; X86-CMOV-NEXT: testb $1, %cl
+; X86-CMOV-NEXT: movl %eax, %ecx
+; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-CMOV-NEXT: subl %edx, %ecx
+; X86-CMOV-NEXT: cmovgl %eax, %edx
+; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32
+; X86-CMOV-NEXT: sete %dl
+; X86-CMOV-NEXT: testb $1, %dl
; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: jne .LBB6_2
; X86-CMOV-NEXT: jmp .LBB6_1
; X86-CMOV-NEXT: .LBB6_2: # %atomicrmw.end
@@ -315,19 +319,21 @@ define void @atomic_fetch_max32(i32 %x) nounwind {
;
; X86-NOCMOV-LABEL: atomic_fetch_max32:
; X86-NOCMOV: # %bb.0:
-; X86-NOCMOV-NEXT: subl $16, %esp
+; X86-NOCMOV-NEXT: pushl %esi
+; X86-NOCMOV-NEXT: subl $20, %esp
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOCMOV-NEXT: movl sc32, %ecx
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NOCMOV-NEXT: movl sc32, %eax
-; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: .LBB6_1: # %atomicrmw.start
; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %eax, %ecx
+; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: subl %edx, %ecx
+; X86-NOCMOV-NEXT: movl %eax, %esi
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jg .LBB6_4
; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start
; X86-NOCMOV-NEXT: # in Loop: Header=BB6_1 Depth=1
@@ -335,33 +341,39 @@ define void @atomic_fetch_max32(i32 %x) nounwind {
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: .LBB6_4: # %atomicrmw.start
; X86-NOCMOV-NEXT: # in Loop: Header=BB6_1 Depth=1
-; X86-NOCMOV-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NOCMOV-NEXT: lock cmpxchgl %ecx, sc32
-; X86-NOCMOV-NEXT: sete %cl
-; X86-NOCMOV-NEXT: testb $1, %cl
+; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NOCMOV-NEXT: movl %ecx, %eax
+; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload
+; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32
+; X86-NOCMOV-NEXT: sete %dl
+; X86-NOCMOV-NEXT: testb $1, %dl
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jne .LBB6_2
; X86-NOCMOV-NEXT: jmp .LBB6_1
; X86-NOCMOV-NEXT: .LBB6_2: # %atomicrmw.end
-; X86-NOCMOV-NEXT: addl $16, %esp
+; X86-NOCMOV-NEXT: addl $20, %esp
+; X86-NOCMOV-NEXT: popl %esi
; X86-NOCMOV-NEXT: retl
;
; X86-NOX87-LABEL: atomic_fetch_max32:
; X86-NOX87: # %bb.0:
-; X86-NOX87-NEXT: subl $16, %esp
+; X86-NOX87-NEXT: pushl %esi
+; X86-NOX87-NEXT: subl $20, %esp
; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOX87-NEXT: movl sc32, %ecx
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NOX87-NEXT: movl sc32, %eax
-; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NOX87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: .LBB6_1: # %atomicrmw.start
; X86-NOX87-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NOX87-NEXT: movl %eax, %ecx
+; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOX87-NEXT: subl %edx, %ecx
+; X86-NOX87-NEXT: movl %eax, %esi
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NOX87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: jg .LBB6_4
; X86-NOX87-NEXT: # %bb.3: # %atomicrmw.start
; X86-NOX87-NEXT: # in Loop: Header=BB6_1 Depth=1
@@ -369,16 +381,20 @@ define void @atomic_fetch_max32(i32 %x) nounwind {
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: .LBB6_4: # %atomicrmw.start
; X86-NOX87-NEXT: # in Loop: Header=BB6_1 Depth=1
-; X86-NOX87-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NOX87-NEXT: lock cmpxchgl %ecx, sc32
-; X86-NOX87-NEXT: sete %cl
-; X86-NOX87-NEXT: testb $1, %cl
+; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NOX87-NEXT: movl %ecx, %eax
+; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload
+; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32
+; X86-NOX87-NEXT: sete %dl
+; X86-NOX87-NEXT: testb $1, %dl
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: jne .LBB6_2
; X86-NOX87-NEXT: jmp .LBB6_1
; X86-NOX87-NEXT: .LBB6_2: # %atomicrmw.end
-; X86-NOX87-NEXT: addl $16, %esp
+; X86-NOX87-NEXT: addl $20, %esp
+; X86-NOX87-NEXT: popl %esi
; X86-NOX87-NEXT: retl
%t1 = atomicrmw max i32* @sc32, i32 %x acquire
ret void
@@ -387,21 +403,21 @@ define void @atomic_fetch_max32(i32 %x) nounwind {
define void @atomic_fetch_min32(i32 %x) nounwind {
; X64-LABEL: atomic_fetch_min32:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl sc32, %eax
+; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: .LBB7_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
-; X64-NEXT: movl %eax, %edx
-; X64-NEXT: subl %ecx, %edx
-; X64-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; X64-NEXT: cmovlel %eax, %ecx
-; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)
-; X64-NEXT: sete %cl
-; X64-NEXT: testb $1, %cl
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
+; X64-NEXT: subl %edx, %ecx
+; X64-NEXT: cmovlel %eax, %edx
+; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)
+; X64-NEXT: sete %dl
+; X64-NEXT: testb $1, %dl
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB7_2
; X64-NEXT: jmp .LBB7_1
; X64-NEXT: .LBB7_2: # %atomicrmw.end
@@ -411,21 +427,21 @@ define void @atomic_fetch_min32(i32 %x) nounwind {
; X86-CMOV: # %bb.0:
; X86-CMOV-NEXT: subl $12, %esp
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-CMOV-NEXT: movl sc32, %ecx
; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-CMOV-NEXT: movl sc32, %eax
-; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: .LBB7_1: # %atomicrmw.start
; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1
; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-CMOV-NEXT: movl %eax, %edx
-; X86-CMOV-NEXT: subl %ecx, %edx
-; X86-CMOV-NEXT: movl %edx, (%esp) # 4-byte Spill
-; X86-CMOV-NEXT: cmovlel %eax, %ecx
-; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32
-; X86-CMOV-NEXT: sete %cl
-; X86-CMOV-NEXT: testb $1, %cl
+; X86-CMOV-NEXT: movl %eax, %ecx
+; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-CMOV-NEXT: subl %edx, %ecx
+; X86-CMOV-NEXT: cmovlel %eax, %edx
+; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32
+; X86-CMOV-NEXT: sete %dl
+; X86-CMOV-NEXT: testb $1, %dl
; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: jne .LBB7_2
; X86-CMOV-NEXT: jmp .LBB7_1
; X86-CMOV-NEXT: .LBB7_2: # %atomicrmw.end
@@ -434,19 +450,21 @@ define void @atomic_fetch_min32(i32 %x) nounwind {
;
; X86-NOCMOV-LABEL: atomic_fetch_min32:
; X86-NOCMOV: # %bb.0:
-; X86-NOCMOV-NEXT: subl $16, %esp
+; X86-NOCMOV-NEXT: pushl %esi
+; X86-NOCMOV-NEXT: subl $20, %esp
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOCMOV-NEXT: movl sc32, %ecx
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NOCMOV-NEXT: movl sc32, %eax
-; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: .LBB7_1: # %atomicrmw.start
; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %eax, %ecx
+; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: subl %edx, %ecx
+; X86-NOCMOV-NEXT: movl %eax, %esi
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jle .LBB7_4
; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start
; X86-NOCMOV-NEXT: # in Loop: Header=BB7_1 Depth=1
@@ -454,33 +472,39 @@ define void @atomic_fetch_min32(i32 %x) nounwind {
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: .LBB7_4: # %atomicrmw.start
; X86-NOCMOV-NEXT: # in Loop: Header=BB7_1 Depth=1
-; X86-NOCMOV-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NOCMOV-NEXT: lock cmpxchgl %ecx, sc32
-; X86-NOCMOV-NEXT: sete %cl
-; X86-NOCMOV-NEXT: testb $1, %cl
+; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NOCMOV-NEXT: movl %ecx, %eax
+; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload
+; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32
+; X86-NOCMOV-NEXT: sete %dl
+; X86-NOCMOV-NEXT: testb $1, %dl
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jne .LBB7_2
; X86-NOCMOV-NEXT: jmp .LBB7_1
; X86-NOCMOV-NEXT: .LBB7_2: # %atomicrmw.end
-; X86-NOCMOV-NEXT: addl $16, %esp
+; X86-NOCMOV-NEXT: addl $20, %esp
+; X86-NOCMOV-NEXT: popl %esi
; X86-NOCMOV-NEXT: retl
;
; X86-NOX87-LABEL: atomic_fetch_min32:
; X86-NOX87: # %bb.0:
-; X86-NOX87-NEXT: subl $16, %esp
+; X86-NOX87-NEXT: pushl %esi
+; X86-NOX87-NEXT: subl $20, %esp
; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOX87-NEXT: movl sc32, %ecx
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NOX87-NEXT: movl sc32, %eax
-; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NOX87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: .LBB7_1: # %atomicrmw.start
; X86-NOX87-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NOX87-NEXT: movl %eax, %ecx
+; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOX87-NEXT: subl %edx, %ecx
+; X86-NOX87-NEXT: movl %eax, %esi
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NOX87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: jle .LBB7_4
; X86-NOX87-NEXT: # %bb.3: # %atomicrmw.start
; X86-NOX87-NEXT: # in Loop: Header=BB7_1 Depth=1
@@ -488,16 +512,20 @@ define void @atomic_fetch_min32(i32 %x) nounwind {
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: .LBB7_4: # %atomicrmw.start
; X86-NOX87-NEXT: # in Loop: Header=BB7_1 Depth=1
-; X86-NOX87-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NOX87-NEXT: lock cmpxchgl %ecx, sc32
-; X86-NOX87-NEXT: sete %cl
-; X86-NOX87-NEXT: testb $1, %cl
+; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NOX87-NEXT: movl %ecx, %eax
+; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload
+; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32
+; X86-NOX87-NEXT: sete %dl
+; X86-NOX87-NEXT: testb $1, %dl
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: jne .LBB7_2
; X86-NOX87-NEXT: jmp .LBB7_1
; X86-NOX87-NEXT: .LBB7_2: # %atomicrmw.end
-; X86-NOX87-NEXT: addl $16, %esp
+; X86-NOX87-NEXT: addl $20, %esp
+; X86-NOX87-NEXT: popl %esi
; X86-NOX87-NEXT: retl
%t1 = atomicrmw min i32* @sc32, i32 %x acquire
ret void
@@ -506,21 +534,21 @@ define void @atomic_fetch_min32(i32 %x) nounwind {
define void @atomic_fetch_umax32(i32 %x) nounwind {
; X64-LABEL: atomic_fetch_umax32:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl sc32, %eax
+; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: .LBB8_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
-; X64-NEXT: movl %eax, %edx
-; X64-NEXT: subl %ecx, %edx
-; X64-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; X64-NEXT: cmoval %eax, %ecx
-; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)
-; X64-NEXT: sete %cl
-; X64-NEXT: testb $1, %cl
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
+; X64-NEXT: subl %edx, %ecx
+; X64-NEXT: cmoval %eax, %edx
+; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)
+; X64-NEXT: sete %dl
+; X64-NEXT: testb $1, %dl
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB8_2
; X64-NEXT: jmp .LBB8_1
; X64-NEXT: .LBB8_2: # %atomicrmw.end
@@ -530,21 +558,21 @@ define void @atomic_fetch_umax32(i32 %x) nounwind {
; X86-CMOV: # %bb.0:
; X86-CMOV-NEXT: subl $12, %esp
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-CMOV-NEXT: movl sc32, %ecx
; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-CMOV-NEXT: movl sc32, %eax
-; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: .LBB8_1: # %atomicrmw.start
; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1
; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-CMOV-NEXT: movl %eax, %edx
-; X86-CMOV-NEXT: subl %ecx, %edx
-; X86-CMOV-NEXT: movl %edx, (%esp) # 4-byte Spill
-; X86-CMOV-NEXT: cmoval %eax, %ecx
-; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32
-; X86-CMOV-NEXT: sete %cl
-; X86-CMOV-NEXT: testb $1, %cl
+; X86-CMOV-NEXT: movl %eax, %ecx
+; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-CMOV-NEXT: subl %edx, %ecx
+; X86-CMOV-NEXT: cmoval %eax, %edx
+; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32
+; X86-CMOV-NEXT: sete %dl
+; X86-CMOV-NEXT: testb $1, %dl
; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: jne .LBB8_2
; X86-CMOV-NEXT: jmp .LBB8_1
; X86-CMOV-NEXT: .LBB8_2: # %atomicrmw.end
@@ -553,19 +581,21 @@ define void @atomic_fetch_umax32(i32 %x) nounwind {
;
; X86-NOCMOV-LABEL: atomic_fetch_umax32:
; X86-NOCMOV: # %bb.0:
-; X86-NOCMOV-NEXT: subl $16, %esp
+; X86-NOCMOV-NEXT: pushl %esi
+; X86-NOCMOV-NEXT: subl $20, %esp
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOCMOV-NEXT: movl sc32, %ecx
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NOCMOV-NEXT: movl sc32, %eax
-; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: .LBB8_1: # %atomicrmw.start
; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %eax, %ecx
+; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: subl %edx, %ecx
+; X86-NOCMOV-NEXT: movl %eax, %esi
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: ja .LBB8_4
; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start
; X86-NOCMOV-NEXT: # in Loop: Header=BB8_1 Depth=1
@@ -573,33 +603,39 @@ define void @atomic_fetch_umax32(i32 %x) nounwind {
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: .LBB8_4: # %atomicrmw.start
; X86-NOCMOV-NEXT: # in Loop: Header=BB8_1 Depth=1
-; X86-NOCMOV-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NOCMOV-NEXT: lock cmpxchgl %ecx, sc32
-; X86-NOCMOV-NEXT: sete %cl
-; X86-NOCMOV-NEXT: testb $1, %cl
+; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NOCMOV-NEXT: movl %ecx, %eax
+; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload
+; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32
+; X86-NOCMOV-NEXT: sete %dl
+; X86-NOCMOV-NEXT: testb $1, %dl
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jne .LBB8_2
; X86-NOCMOV-NEXT: jmp .LBB8_1
; X86-NOCMOV-NEXT: .LBB8_2: # %atomicrmw.end
-; X86-NOCMOV-NEXT: addl $16, %esp
+; X86-NOCMOV-NEXT: addl $20, %esp
+; X86-NOCMOV-NEXT: popl %esi
; X86-NOCMOV-NEXT: retl
;
; X86-NOX87-LABEL: atomic_fetch_umax32:
; X86-NOX87: # %bb.0:
-; X86-NOX87-NEXT: subl $16, %esp
+; X86-NOX87-NEXT: pushl %esi
+; X86-NOX87-NEXT: subl $20, %esp
; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOX87-NEXT: movl sc32, %ecx
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NOX87-NEXT: movl sc32, %eax
-; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NOX87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: .LBB8_1: # %atomicrmw.start
; X86-NOX87-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NOX87-NEXT: movl %eax, %ecx
+; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOX87-NEXT: subl %edx, %ecx
+; X86-NOX87-NEXT: movl %eax, %esi
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NOX87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: ja .LBB8_4
; X86-NOX87-NEXT: # %bb.3: # %atomicrmw.start
; X86-NOX87-NEXT: # in Loop: Header=BB8_1 Depth=1
@@ -607,16 +643,20 @@ define void @atomic_fetch_umax32(i32 %x) nounwind {
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: .LBB8_4: # %atomicrmw.start
; X86-NOX87-NEXT: # in Loop: Header=BB8_1 Depth=1
-; X86-NOX87-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NOX87-NEXT: lock cmpxchgl %ecx, sc32
-; X86-NOX87-NEXT: sete %cl
-; X86-NOX87-NEXT: testb $1, %cl
+; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NOX87-NEXT: movl %ecx, %eax
+; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload
+; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32
+; X86-NOX87-NEXT: sete %dl
+; X86-NOX87-NEXT: testb $1, %dl
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: jne .LBB8_2
; X86-NOX87-NEXT: jmp .LBB8_1
; X86-NOX87-NEXT: .LBB8_2: # %atomicrmw.end
-; X86-NOX87-NEXT: addl $16, %esp
+; X86-NOX87-NEXT: addl $20, %esp
+; X86-NOX87-NEXT: popl %esi
; X86-NOX87-NEXT: retl
%t1 = atomicrmw umax i32* @sc32, i32 %x acquire
ret void
@@ -625,21 +665,21 @@ define void @atomic_fetch_umax32(i32 %x) nounwind {
define void @atomic_fetch_umin32(i32 %x) nounwind {
; X64-LABEL: atomic_fetch_umin32:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl sc32, %eax
+; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: .LBB9_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
-; X64-NEXT: movl %eax, %edx
-; X64-NEXT: subl %ecx, %edx
-; X64-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; X64-NEXT: cmovbel %eax, %ecx
-; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)
-; X64-NEXT: sete %cl
-; X64-NEXT: testb $1, %cl
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
+; X64-NEXT: subl %edx, %ecx
+; X64-NEXT: cmovbel %eax, %edx
+; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)
+; X64-NEXT: sete %dl
+; X64-NEXT: testb $1, %dl
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB9_2
; X64-NEXT: jmp .LBB9_1
; X64-NEXT: .LBB9_2: # %atomicrmw.end
@@ -649,21 +689,21 @@ define void @atomic_fetch_umin32(i32 %x) nounwind {
; X86-CMOV: # %bb.0:
; X86-CMOV-NEXT: subl $12, %esp
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-CMOV-NEXT: movl sc32, %ecx
; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-CMOV-NEXT: movl sc32, %eax
-; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: .LBB9_1: # %atomicrmw.start
; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1
; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-CMOV-NEXT: movl %eax, %edx
-; X86-CMOV-NEXT: subl %ecx, %edx
-; X86-CMOV-NEXT: movl %edx, (%esp) # 4-byte Spill
-; X86-CMOV-NEXT: cmovbel %eax, %ecx
-; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32
-; X86-CMOV-NEXT: sete %cl
-; X86-CMOV-NEXT: testb $1, %cl
+; X86-CMOV-NEXT: movl %eax, %ecx
+; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-CMOV-NEXT: subl %edx, %ecx
+; X86-CMOV-NEXT: cmovbel %eax, %edx
+; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32
+; X86-CMOV-NEXT: sete %dl
+; X86-CMOV-NEXT: testb $1, %dl
; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: jne .LBB9_2
; X86-CMOV-NEXT: jmp .LBB9_1
; X86-CMOV-NEXT: .LBB9_2: # %atomicrmw.end
@@ -672,19 +712,21 @@ define void @atomic_fetch_umin32(i32 %x) nounwind {
;
; X86-NOCMOV-LABEL: atomic_fetch_umin32:
; X86-NOCMOV: # %bb.0:
-; X86-NOCMOV-NEXT: subl $16, %esp
+; X86-NOCMOV-NEXT: pushl %esi
+; X86-NOCMOV-NEXT: subl $20, %esp
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOCMOV-NEXT: movl sc32, %ecx
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NOCMOV-NEXT: movl sc32, %eax
-; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: .LBB9_1: # %atomicrmw.start
; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %eax, %ecx
+; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: subl %edx, %ecx
+; X86-NOCMOV-NEXT: movl %eax, %esi
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jbe .LBB9_4
; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start
; X86-NOCMOV-NEXT: # in Loop: Header=BB9_1 Depth=1
@@ -692,33 +734,39 @@ define void @atomic_fetch_umin32(i32 %x) nounwind {
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: .LBB9_4: # %atomicrmw.start
; X86-NOCMOV-NEXT: # in Loop: Header=BB9_1 Depth=1
-; X86-NOCMOV-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NOCMOV-NEXT: lock cmpxchgl %ecx, sc32
-; X86-NOCMOV-NEXT: sete %cl
-; X86-NOCMOV-NEXT: testb $1, %cl
+; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NOCMOV-NEXT: movl %ecx, %eax
+; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload
+; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32
+; X86-NOCMOV-NEXT: sete %dl
+; X86-NOCMOV-NEXT: testb $1, %dl
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jne .LBB9_2
; X86-NOCMOV-NEXT: jmp .LBB9_1
; X86-NOCMOV-NEXT: .LBB9_2: # %atomicrmw.end
-; X86-NOCMOV-NEXT: addl $16, %esp
+; X86-NOCMOV-NEXT: addl $20, %esp
+; X86-NOCMOV-NEXT: popl %esi
; X86-NOCMOV-NEXT: retl
;
; X86-NOX87-LABEL: atomic_fetch_umin32:
; X86-NOX87: # %bb.0:
-; X86-NOX87-NEXT: subl $16, %esp
+; X86-NOX87-NEXT: pushl %esi
+; X86-NOX87-NEXT: subl $20, %esp
; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOX87-NEXT: movl sc32, %ecx
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NOX87-NEXT: movl sc32, %eax
-; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NOX87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: .LBB9_1: # %atomicrmw.start
; X86-NOX87-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NOX87-NEXT: movl %eax, %ecx
+; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOX87-NEXT: subl %edx, %ecx
+; X86-NOX87-NEXT: movl %eax, %esi
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NOX87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: jbe .LBB9_4
; X86-NOX87-NEXT: # %bb.3: # %atomicrmw.start
; X86-NOX87-NEXT: # in Loop: Header=BB9_1 Depth=1
@@ -726,16 +774,20 @@ define void @atomic_fetch_umin32(i32 %x) nounwind {
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: .LBB9_4: # %atomicrmw.start
; X86-NOX87-NEXT: # in Loop: Header=BB9_1 Depth=1
-; X86-NOX87-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NOX87-NEXT: lock cmpxchgl %ecx, sc32
-; X86-NOX87-NEXT: sete %cl
-; X86-NOX87-NEXT: testb $1, %cl
+; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NOX87-NEXT: movl %ecx, %eax
+; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload
+; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32
+; X86-NOX87-NEXT: sete %dl
+; X86-NOX87-NEXT: testb $1, %dl
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: jne .LBB9_2
; X86-NOX87-NEXT: jmp .LBB9_1
; X86-NOX87-NEXT: .LBB9_2: # %atomicrmw.end
-; X86-NOX87-NEXT: addl $16, %esp
+; X86-NOX87-NEXT: addl $20, %esp
+; X86-NOX87-NEXT: popl %esi
; X86-NOX87-NEXT: retl
%t1 = atomicrmw umin i32* @sc32, i32 %x acquire
ret void
diff --git a/llvm/test/CodeGen/X86/atomic64.ll b/llvm/test/CodeGen/X86/atomic64.ll
index 452bcb254e0d..963561dc8deb 100644
--- a/llvm/test/CodeGen/X86/atomic64.ll
+++ b/llvm/test/CodeGen/X86/atomic64.ll
@@ -17,37 +17,46 @@ define void @atomic_fetch_add64() nounwind {
;
; I486-LABEL: atomic_fetch_add64:
; I486: # %bb.0: # %entry
-; I486-NEXT: subl $16, %esp
+; I486-NEXT: pushl %esi
+; I486-NEXT: subl $48, %esp
; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $0, 8(%eax)
-; I486-NEXT: movl $1, 4(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: movl %esp, %ecx
+; I486-NEXT: movl $2, 12(%ecx)
+; I486-NEXT: movl $0, 8(%ecx)
+; I486-NEXT: movl $1, 4(%ecx)
+; I486-NEXT: movl $sc64, (%ecx)
+; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_add_8
-; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $0, 8(%eax)
-; I486-NEXT: movl $3, 4(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: leal sc64, %ecx
+; I486-NEXT: movl %esp, %esi
+; I486-NEXT: movl $2, 12(%esi)
+; I486-NEXT: movl $0, 8(%esi)
+; I486-NEXT: movl $3, 4(%esi)
+; I486-NEXT: movl $sc64, (%esi)
+; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_add_8
-; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $0, 8(%eax)
-; I486-NEXT: movl $5, 4(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: leal sc64, %ecx
+; I486-NEXT: movl %esp, %esi
+; I486-NEXT: movl $2, 12(%esi)
+; I486-NEXT: movl $0, 8(%esi)
+; I486-NEXT: movl $5, 4(%esi)
+; I486-NEXT: movl $sc64, (%esi)
+; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_add_8
-; I486-NEXT: movl %eax, %ecx
-; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl %edx, 8(%eax)
-; I486-NEXT: movl %ecx, 4(%eax)
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: leal sc64, %ecx
+; I486-NEXT: movl %esp, %esi
+; I486-NEXT: movl %edx, 8(%esi)
+; I486-NEXT: movl %eax, 4(%esi)
+; I486-NEXT: movl $2, 12(%esi)
+; I486-NEXT: movl $sc64, (%esi)
+; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_add_8
-; I486-NEXT: addl $16, %esp
+; I486-NEXT: addl $48, %esp
+; I486-NEXT: popl %esi
; I486-NEXT: retl
entry:
%t1 = atomicrmw add i64* @sc64, i64 1 acquire
@@ -69,37 +78,46 @@ define void @atomic_fetch_sub64() nounwind {
;
; I486-LABEL: atomic_fetch_sub64:
; I486: # %bb.0:
-; I486-NEXT: subl $16, %esp
+; I486-NEXT: pushl %esi
+; I486-NEXT: subl $48, %esp
; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $0, 8(%eax)
-; I486-NEXT: movl $1, 4(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: movl %esp, %ecx
+; I486-NEXT: movl $2, 12(%ecx)
+; I486-NEXT: movl $0, 8(%ecx)
+; I486-NEXT: movl $1, 4(%ecx)
+; I486-NEXT: movl $sc64, (%ecx)
+; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_sub_8
-; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $0, 8(%eax)
-; I486-NEXT: movl $3, 4(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: leal sc64, %ecx
+; I486-NEXT: movl %esp, %esi
+; I486-NEXT: movl $2, 12(%esi)
+; I486-NEXT: movl $0, 8(%esi)
+; I486-NEXT: movl $3, 4(%esi)
+; I486-NEXT: movl $sc64, (%esi)
+; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_sub_8
-; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $0, 8(%eax)
-; I486-NEXT: movl $5, 4(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: leal sc64, %ecx
+; I486-NEXT: movl %esp, %esi
+; I486-NEXT: movl $2, 12(%esi)
+; I486-NEXT: movl $0, 8(%esi)
+; I486-NEXT: movl $5, 4(%esi)
+; I486-NEXT: movl $sc64, (%esi)
+; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_sub_8
-; I486-NEXT: movl %eax, %ecx
-; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl %edx, 8(%eax)
-; I486-NEXT: movl %ecx, 4(%eax)
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: leal sc64, %ecx
+; I486-NEXT: movl %esp, %esi
+; I486-NEXT: movl %edx, 8(%esi)
+; I486-NEXT: movl %eax, 4(%esi)
+; I486-NEXT: movl $2, 12(%esi)
+; I486-NEXT: movl $sc64, (%esi)
+; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_sub_8
-; I486-NEXT: addl $16, %esp
+; I486-NEXT: addl $48, %esp
+; I486-NEXT: popl %esi
; I486-NEXT: retl
%t1 = atomicrmw sub i64* @sc64, i64 1 acquire
%t2 = atomicrmw sub i64* @sc64, i64 3 acquire
@@ -122,8 +140,9 @@ define void @atomic_fetch_and64() nounwind {
; X64-NEXT: # kill: def $rcx killed $ecx
; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip)
; X64-NEXT: sete %cl
-; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: testb $1, %cl
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: jne .LBB2_2
; X64-NEXT: jmp .LBB2_1
@@ -134,30 +153,36 @@ define void @atomic_fetch_and64() nounwind {
;
; I486-LABEL: atomic_fetch_and64:
; I486: # %bb.0:
-; I486-NEXT: subl $16, %esp
+; I486-NEXT: pushl %esi
+; I486-NEXT: subl $36, %esp
; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $0, 8(%eax)
-; I486-NEXT: movl $3, 4(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: movl %esp, %ecx
+; I486-NEXT: movl $2, 12(%ecx)
+; I486-NEXT: movl $0, 8(%ecx)
+; I486-NEXT: movl $3, 4(%ecx)
+; I486-NEXT: movl $sc64, (%ecx)
+; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_and_8
-; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $0, 8(%eax)
-; I486-NEXT: movl $5, 4(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: leal sc64, %ecx
+; I486-NEXT: movl %esp, %esi
+; I486-NEXT: movl $2, 12(%esi)
+; I486-NEXT: movl $0, 8(%esi)
+; I486-NEXT: movl $5, 4(%esi)
+; I486-NEXT: movl $sc64, (%esi)
+; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_and_8
-; I486-NEXT: movl %eax, %ecx
-; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl %edx, 8(%eax)
-; I486-NEXT: movl %ecx, 4(%eax)
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: leal sc64, %ecx
+; I486-NEXT: movl %esp, %esi
+; I486-NEXT: movl %edx, 8(%esi)
+; I486-NEXT: movl %eax, 4(%esi)
+; I486-NEXT: movl $2, 12(%esi)
+; I486-NEXT: movl $sc64, (%esi)
+; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_and_8
-; I486-NEXT: addl $16, %esp
+; I486-NEXT: addl $36, %esp
+; I486-NEXT: popl %esi
; I486-NEXT: retl
%t1 = atomicrmw and i64* @sc64, i64 3 acquire
%t2 = atomicrmw and i64* @sc64, i64 5 acquire
@@ -178,8 +203,9 @@ define void @atomic_fetch_or64() nounwind {
; X64-NEXT: orq $5, %rcx
; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip)
; X64-NEXT: sete %cl
-; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: testb $1, %cl
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: jne .LBB3_2
; X64-NEXT: jmp .LBB3_1
@@ -190,30 +216,36 @@ define void @atomic_fetch_or64() nounwind {
;
; I486-LABEL: atomic_fetch_or64:
; I486: # %bb.0:
-; I486-NEXT: subl $16, %esp
+; I486-NEXT: pushl %esi
+; I486-NEXT: subl $36, %esp
; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $0, 8(%eax)
-; I486-NEXT: movl $3, 4(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: movl %esp, %ecx
+; I486-NEXT: movl $2, 12(%ecx)
+; I486-NEXT: movl $0, 8(%ecx)
+; I486-NEXT: movl $3, 4(%ecx)
+; I486-NEXT: movl $sc64, (%ecx)
+; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_or_8
-; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $0, 8(%eax)
-; I486-NEXT: movl $5, 4(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: leal sc64, %ecx
+; I486-NEXT: movl %esp, %esi
+; I486-NEXT: movl $2, 12(%esi)
+; I486-NEXT: movl $0, 8(%esi)
+; I486-NEXT: movl $5, 4(%esi)
+; I486-NEXT: movl $sc64, (%esi)
+; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_or_8
-; I486-NEXT: movl %eax, %ecx
-; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl %edx, 8(%eax)
-; I486-NEXT: movl %ecx, 4(%eax)
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: leal sc64, %ecx
+; I486-NEXT: movl %esp, %esi
+; I486-NEXT: movl %edx, 8(%esi)
+; I486-NEXT: movl %eax, 4(%esi)
+; I486-NEXT: movl $2, 12(%esi)
+; I486-NEXT: movl $sc64, (%esi)
+; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_or_8
-; I486-NEXT: addl $16, %esp
+; I486-NEXT: addl $36, %esp
+; I486-NEXT: popl %esi
; I486-NEXT: retl
%t1 = atomicrmw or i64* @sc64, i64 3 acquire
%t2 = atomicrmw or i64* @sc64, i64 5 acquire
@@ -234,8 +266,9 @@ define void @atomic_fetch_xor64() nounwind {
; X64-NEXT: xorq $5, %rcx
; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip)
; X64-NEXT: sete %cl
-; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: testb $1, %cl
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: jne .LBB4_2
; X64-NEXT: jmp .LBB4_1
@@ -246,30 +279,36 @@ define void @atomic_fetch_xor64() nounwind {
;
; I486-LABEL: atomic_fetch_xor64:
; I486: # %bb.0:
-; I486-NEXT: subl $16, %esp
+; I486-NEXT: pushl %esi
+; I486-NEXT: subl $36, %esp
; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $0, 8(%eax)
-; I486-NEXT: movl $3, 4(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: movl %esp, %ecx
+; I486-NEXT: movl $2, 12(%ecx)
+; I486-NEXT: movl $0, 8(%ecx)
+; I486-NEXT: movl $3, 4(%ecx)
+; I486-NEXT: movl $sc64, (%ecx)
+; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_xor_8
-; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $0, 8(%eax)
-; I486-NEXT: movl $5, 4(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: leal sc64, %ecx
+; I486-NEXT: movl %esp, %esi
+; I486-NEXT: movl $2, 12(%esi)
+; I486-NEXT: movl $0, 8(%esi)
+; I486-NEXT: movl $5, 4(%esi)
+; I486-NEXT: movl $sc64, (%esi)
+; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_xor_8
-; I486-NEXT: movl %eax, %ecx
-; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl %edx, 8(%eax)
-; I486-NEXT: movl %ecx, 4(%eax)
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: leal sc64, %ecx
+; I486-NEXT: movl %esp, %esi
+; I486-NEXT: movl %edx, 8(%esi)
+; I486-NEXT: movl %eax, 4(%esi)
+; I486-NEXT: movl $2, 12(%esi)
+; I486-NEXT: movl $sc64, (%esi)
+; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_xor_8
-; I486-NEXT: addl $16, %esp
+; I486-NEXT: addl $36, %esp
+; I486-NEXT: popl %esi
; I486-NEXT: retl
%t1 = atomicrmw xor i64* @sc64, i64 3 acquire
%t2 = atomicrmw xor i64* @sc64, i64 5 acquire
@@ -280,16 +319,15 @@ define void @atomic_fetch_xor64() nounwind {
define void @atomic_fetch_nand64(i64 %x) nounwind {
; X64-LABEL: atomic_fetch_nand64:
; X64: # %bb.0:
-; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq sc64, %rax
+; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: .LBB5_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: andq %rdx, %rcx
-; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: notq %rcx
; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip)
; X64-NEXT: sete %cl
@@ -302,17 +340,20 @@ define void @atomic_fetch_nand64(i64 %x) nounwind {
;
; I486-LABEL: atomic_fetch_nand64:
; I486: # %bb.0:
-; I486-NEXT: subl $16, %esp
-; I486-NEXT: movl {{[0-9]+}}(%esp), %edx
+; I486-NEXT: pushl %esi
+; I486-NEXT: subl $20, %esp
+; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl %edx, 8(%eax)
-; I486-NEXT: movl %ecx, 4(%eax)
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: leal sc64, %edx
+; I486-NEXT: movl %esp, %esi
+; I486-NEXT: movl %eax, 8(%esi)
+; I486-NEXT: movl %ecx, 4(%esi)
+; I486-NEXT: movl $2, 12(%esi)
+; I486-NEXT: movl $sc64, (%esi)
+; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_nand_8
-; I486-NEXT: addl $16, %esp
+; I486-NEXT: addl $20, %esp
+; I486-NEXT: popl %esi
; I486-NEXT: retl
%t1 = atomicrmw nand i64* @sc64, i64 %x acquire
ret void
@@ -321,21 +362,21 @@ define void @atomic_fetch_nand64(i64 %x) nounwind {
define void @atomic_fetch_max64(i64 %x) nounwind {
; X64-LABEL: atomic_fetch_max64:
; X64: # %bb.0:
-; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq sc64, %rax
+; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: .LBB6_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: movq %rax, %rdx
-; X64-NEXT: subq %rcx, %rdx
-; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: cmovgq %rax, %rcx
-; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip)
-; X64-NEXT: sete %cl
-; X64-NEXT: testb $1, %cl
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT: subq %rdx, %rcx
+; X64-NEXT: cmovgq %rax, %rdx
+; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip)
+; X64-NEXT: sete %dl
+; X64-NEXT: testb $1, %dl
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: jne .LBB6_2
; X64-NEXT: jmp .LBB6_1
; X64-NEXT: .LBB6_2: # %atomicrmw.end
@@ -345,65 +386,70 @@ define void @atomic_fetch_max64(i64 %x) nounwind {
; I486: # %bb.0:
; I486-NEXT: pushl %ebp
; I486-NEXT: movl %esp, %ebp
+; I486-NEXT: pushl %ebx
+; I486-NEXT: pushl %edi
; I486-NEXT: pushl %esi
; I486-NEXT: andl $-8, %esp
; I486-NEXT: subl $72, %esp
; I486-NEXT: movl 12(%ebp), %eax
+; I486-NEXT: movl 8(%ebp), %ecx
+; I486-NEXT: movl sc64+4, %edx
+; I486-NEXT: movl sc64, %esi
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl 8(%ebp), %eax
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl sc64+4, %eax
-; I486-NEXT: movl sc64, %ecx
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: jmp .LBB6_1
; I486-NEXT: .LBB6_1: # %atomicrmw.start
; I486-NEXT: # =>This Inner Loop Header: Depth=1
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; I486-NEXT: subl %ecx, %edx
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT: sbbl %eax, %esi
+; I486-NEXT: movl %ecx, %edi
+; I486-NEXT: movl %eax, %ebx
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: subl %ecx, %esi
-; I486-NEXT: sbbl %eax, %edx
-; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: jl .LBB6_4
; I486-NEXT: # %bb.3: # %atomicrmw.start
; I486-NEXT: # in Loop: Header=BB6_1 Depth=1
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: .LBB6_4: # %atomicrmw.start
; I486-NEXT: # in Loop: Header=BB6_1 Depth=1
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; I486-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; I486-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl %edx, 12(%eax)
-; I486-NEXT: movl %ecx, 8(%eax)
-; I486-NEXT: leal {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: movl %ecx, 4(%eax)
-; I486-NEXT: movl $2, 20(%eax)
-; I486-NEXT: movl $2, 16(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: movl %esp, %edi
+; I486-NEXT: movl %eax, 12(%edi)
+; I486-NEXT: movl %ecx, 8(%edi)
+; I486-NEXT: leal {{[0-9]+}}(%esp), %eax
+; I486-NEXT: movl %eax, 4(%edi)
+; I486-NEXT: movl $2, 20(%edi)
+; I486-NEXT: movl $2, 16(%edi)
+; I486-NEXT: movl $sc64, (%edi)
; I486-NEXT: calll __atomic_compare_exchange_8
-; I486-NEXT: movb %al, %dl
; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
-; I486-NEXT: testb %dl, %dl
+; I486-NEXT: movl {{[0-9]+}}(%esp), %edx
+; I486-NEXT: testb %al, %al
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: je .LBB6_1
; I486-NEXT: jmp .LBB6_2
; I486-NEXT: .LBB6_2: # %atomicrmw.end
-; I486-NEXT: leal -4(%ebp), %esp
+; I486-NEXT: leal -12(%ebp), %esp
; I486-NEXT: popl %esi
+; I486-NEXT: popl %edi
+; I486-NEXT: popl %ebx
; I486-NEXT: popl %ebp
; I486-NEXT: retl
%t1 = atomicrmw max i64* @sc64, i64 %x acquire
@@ -414,21 +460,21 @@ define void @atomic_fetch_max64(i64 %x) nounwind {
define void @atomic_fetch_min64(i64 %x) nounwind {
; X64-LABEL: atomic_fetch_min64:
; X64: # %bb.0:
-; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq sc64, %rax
+; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: .LBB7_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: movq %rax, %rdx
-; X64-NEXT: subq %rcx, %rdx
-; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: cmovleq %rax, %rcx
-; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip)
-; X64-NEXT: sete %cl
-; X64-NEXT: testb $1, %cl
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT: subq %rdx, %rcx
+; X64-NEXT: cmovleq %rax, %rdx
+; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip)
+; X64-NEXT: sete %dl
+; X64-NEXT: testb $1, %dl
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: jne .LBB7_2
; X64-NEXT: jmp .LBB7_1
; X64-NEXT: .LBB7_2: # %atomicrmw.end
@@ -438,65 +484,70 @@ define void @atomic_fetch_min64(i64 %x) nounwind {
; I486: # %bb.0:
; I486-NEXT: pushl %ebp
; I486-NEXT: movl %esp, %ebp
+; I486-NEXT: pushl %ebx
+; I486-NEXT: pushl %edi
; I486-NEXT: pushl %esi
; I486-NEXT: andl $-8, %esp
; I486-NEXT: subl $72, %esp
; I486-NEXT: movl 12(%ebp), %eax
+; I486-NEXT: movl 8(%ebp), %ecx
+; I486-NEXT: movl sc64+4, %edx
+; I486-NEXT: movl sc64, %esi
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl 8(%ebp), %eax
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl sc64+4, %eax
-; I486-NEXT: movl sc64, %ecx
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: jmp .LBB7_1
; I486-NEXT: .LBB7_1: # %atomicrmw.start
; I486-NEXT: # =>This Inner Loop Header: Depth=1
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; I486-NEXT: subl %ecx, %edx
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT: sbbl %eax, %esi
+; I486-NEXT: movl %ecx, %edi
+; I486-NEXT: movl %eax, %ebx
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: subl %ecx, %esi
-; I486-NEXT: sbbl %eax, %edx
-; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: jge .LBB7_4
; I486-NEXT: # %bb.3: # %atomicrmw.start
; I486-NEXT: # in Loop: Header=BB7_1 Depth=1
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: .LBB7_4: # %atomicrmw.start
; I486-NEXT: # in Loop: Header=BB7_1 Depth=1
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; I486-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; I486-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl %edx, 12(%eax)
-; I486-NEXT: movl %ecx, 8(%eax)
-; I486-NEXT: leal {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: movl %ecx, 4(%eax)
-; I486-NEXT: movl $2, 20(%eax)
-; I486-NEXT: movl $2, 16(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: movl %esp, %edi
+; I486-NEXT: movl %eax, 12(%edi)
+; I486-NEXT: movl %ecx, 8(%edi)
+; I486-NEXT: leal {{[0-9]+}}(%esp), %eax
+; I486-NEXT: movl %eax, 4(%edi)
+; I486-NEXT: movl $2, 20(%edi)
+; I486-NEXT: movl $2, 16(%edi)
+; I486-NEXT: movl $sc64, (%edi)
; I486-NEXT: calll __atomic_compare_exchange_8
-; I486-NEXT: movb %al, %dl
; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
-; I486-NEXT: testb %dl, %dl
+; I486-NEXT: movl {{[0-9]+}}(%esp), %edx
+; I486-NEXT: testb %al, %al
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: je .LBB7_1
; I486-NEXT: jmp .LBB7_2
; I486-NEXT: .LBB7_2: # %atomicrmw.end
-; I486-NEXT: leal -4(%ebp), %esp
+; I486-NEXT: leal -12(%ebp), %esp
; I486-NEXT: popl %esi
+; I486-NEXT: popl %edi
+; I486-NEXT: popl %ebx
; I486-NEXT: popl %ebp
; I486-NEXT: retl
%t1 = atomicrmw min i64* @sc64, i64 %x acquire
@@ -507,21 +558,21 @@ define void @atomic_fetch_min64(i64 %x) nounwind {
define void @atomic_fetch_umax64(i64 %x) nounwind {
; X64-LABEL: atomic_fetch_umax64:
; X64: # %bb.0:
-; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq sc64, %rax
+; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: .LBB8_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: movq %rax, %rdx
-; X64-NEXT: subq %rcx, %rdx
-; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: cmovaq %rax, %rcx
-; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip)
-; X64-NEXT: sete %cl
-; X64-NEXT: testb $1, %cl
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT: subq %rdx, %rcx
+; X64-NEXT: cmovaq %rax, %rdx
+; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip)
+; X64-NEXT: sete %dl
+; X64-NEXT: testb $1, %dl
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: jne .LBB8_2
; X64-NEXT: jmp .LBB8_1
; X64-NEXT: .LBB8_2: # %atomicrmw.end
@@ -531,65 +582,70 @@ define void @atomic_fetch_umax64(i64 %x) nounwind {
; I486: # %bb.0:
; I486-NEXT: pushl %ebp
; I486-NEXT: movl %esp, %ebp
+; I486-NEXT: pushl %ebx
+; I486-NEXT: pushl %edi
; I486-NEXT: pushl %esi
; I486-NEXT: andl $-8, %esp
; I486-NEXT: subl $72, %esp
; I486-NEXT: movl 12(%ebp), %eax
+; I486-NEXT: movl 8(%ebp), %ecx
+; I486-NEXT: movl sc64+4, %edx
+; I486-NEXT: movl sc64, %esi
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl 8(%ebp), %eax
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl sc64+4, %eax
-; I486-NEXT: movl sc64, %ecx
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: jmp .LBB8_1
; I486-NEXT: .LBB8_1: # %atomicrmw.start
; I486-NEXT: # =>This Inner Loop Header: Depth=1
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; I486-NEXT: subl %ecx, %edx
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT: sbbl %eax, %esi
+; I486-NEXT: movl %ecx, %edi
+; I486-NEXT: movl %eax, %ebx
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: subl %ecx, %esi
-; I486-NEXT: sbbl %eax, %edx
-; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: jb .LBB8_4
; I486-NEXT: # %bb.3: # %atomicrmw.start
; I486-NEXT: # in Loop: Header=BB8_1 Depth=1
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: .LBB8_4: # %atomicrmw.start
; I486-NEXT: # in Loop: Header=BB8_1 Depth=1
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; I486-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; I486-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl %edx, 12(%eax)
-; I486-NEXT: movl %ecx, 8(%eax)
-; I486-NEXT: leal {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: movl %ecx, 4(%eax)
-; I486-NEXT: movl $2, 20(%eax)
-; I486-NEXT: movl $2, 16(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: movl %esp, %edi
+; I486-NEXT: movl %eax, 12(%edi)
+; I486-NEXT: movl %ecx, 8(%edi)
+; I486-NEXT: leal {{[0-9]+}}(%esp), %eax
+; I486-NEXT: movl %eax, 4(%edi)
+; I486-NEXT: movl $2, 20(%edi)
+; I486-NEXT: movl $2, 16(%edi)
+; I486-NEXT: movl $sc64, (%edi)
; I486-NEXT: calll __atomic_compare_exchange_8
-; I486-NEXT: movb %al, %dl
; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
-; I486-NEXT: testb %dl, %dl
+; I486-NEXT: movl {{[0-9]+}}(%esp), %edx
+; I486-NEXT: testb %al, %al
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: je .LBB8_1
; I486-NEXT: jmp .LBB8_2
; I486-NEXT: .LBB8_2: # %atomicrmw.end
-; I486-NEXT: leal -4(%ebp), %esp
+; I486-NEXT: leal -12(%ebp), %esp
; I486-NEXT: popl %esi
+; I486-NEXT: popl %edi
+; I486-NEXT: popl %ebx
; I486-NEXT: popl %ebp
; I486-NEXT: retl
%t1 = atomicrmw umax i64* @sc64, i64 %x acquire
@@ -600,21 +656,21 @@ define void @atomic_fetch_umax64(i64 %x) nounwind {
define void @atomic_fetch_umin64(i64 %x) nounwind {
; X64-LABEL: atomic_fetch_umin64:
; X64: # %bb.0:
-; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq sc64, %rax
+; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: .LBB9_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: movq %rax, %rdx
-; X64-NEXT: subq %rcx, %rdx
-; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: cmovbeq %rax, %rcx
-; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip)
-; X64-NEXT: sete %cl
-; X64-NEXT: testb $1, %cl
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT: subq %rdx, %rcx
+; X64-NEXT: cmovbeq %rax, %rdx
+; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip)
+; X64-NEXT: sete %dl
+; X64-NEXT: testb $1, %dl
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: jne .LBB9_2
; X64-NEXT: jmp .LBB9_1
; X64-NEXT: .LBB9_2: # %atomicrmw.end
@@ -624,65 +680,70 @@ define void @atomic_fetch_umin64(i64 %x) nounwind {
; I486: # %bb.0:
; I486-NEXT: pushl %ebp
; I486-NEXT: movl %esp, %ebp
+; I486-NEXT: pushl %ebx
+; I486-NEXT: pushl %edi
; I486-NEXT: pushl %esi
; I486-NEXT: andl $-8, %esp
; I486-NEXT: subl $72, %esp
; I486-NEXT: movl 12(%ebp), %eax
+; I486-NEXT: movl 8(%ebp), %ecx
+; I486-NEXT: movl sc64+4, %edx
+; I486-NEXT: movl sc64, %esi
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl 8(%ebp), %eax
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl sc64+4, %eax
-; I486-NEXT: movl sc64, %ecx
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: jmp .LBB9_1
; I486-NEXT: .LBB9_1: # %atomicrmw.start
; I486-NEXT: # =>This Inner Loop Header: Depth=1
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; I486-NEXT: subl %ecx, %edx
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT: sbbl %eax, %esi
+; I486-NEXT: movl %ecx, %edi
+; I486-NEXT: movl %eax, %ebx
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: subl %ecx, %esi
-; I486-NEXT: sbbl %eax, %edx
-; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: jae .LBB9_4
; I486-NEXT: # %bb.3: # %atomicrmw.start
; I486-NEXT: # in Loop: Header=BB9_1 Depth=1
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: .LBB9_4: # %atomicrmw.start
; I486-NEXT: # in Loop: Header=BB9_1 Depth=1
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; I486-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; I486-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl %edx, 12(%eax)
-; I486-NEXT: movl %ecx, 8(%eax)
-; I486-NEXT: leal {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: movl %ecx, 4(%eax)
-; I486-NEXT: movl $2, 20(%eax)
-; I486-NEXT: movl $2, 16(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: movl %esp, %edi
+; I486-NEXT: movl %eax, 12(%edi)
+; I486-NEXT: movl %ecx, 8(%edi)
+; I486-NEXT: leal {{[0-9]+}}(%esp), %eax
+; I486-NEXT: movl %eax, 4(%edi)
+; I486-NEXT: movl $2, 20(%edi)
+; I486-NEXT: movl $2, 16(%edi)
+; I486-NEXT: movl $sc64, (%edi)
; I486-NEXT: calll __atomic_compare_exchange_8
-; I486-NEXT: movb %al, %dl
; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
-; I486-NEXT: testb %dl, %dl
+; I486-NEXT: movl {{[0-9]+}}(%esp), %edx
+; I486-NEXT: testb %al, %al
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: je .LBB9_1
; I486-NEXT: jmp .LBB9_2
; I486-NEXT: .LBB9_2: # %atomicrmw.end
-; I486-NEXT: leal -4(%ebp), %esp
+; I486-NEXT: leal -12(%ebp), %esp
; I486-NEXT: popl %esi
+; I486-NEXT: popl %edi
+; I486-NEXT: popl %ebx
; I486-NEXT: popl %ebp
; I486-NEXT: retl
%t1 = atomicrmw umin i64* @sc64, i64 %x acquire
@@ -704,18 +765,19 @@ define void @atomic_fetch_cmpxchg64() nounwind {
; I486-NEXT: pushl %ebp
; I486-NEXT: movl %esp, %ebp
; I486-NEXT: andl $-8, %esp
-; I486-NEXT: subl $32, %esp
+; I486-NEXT: subl $40, %esp
; I486-NEXT: leal sc64, %eax
; I486-NEXT: leal {{[0-9]+}}(%esp), %ecx
; I486-NEXT: movl $0, {{[0-9]+}}(%esp)
; I486-NEXT: movl $0, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl %ecx, 4(%eax)
-; I486-NEXT: movl $2, 20(%eax)
-; I486-NEXT: movl $2, 16(%eax)
-; I486-NEXT: movl $0, 12(%eax)
-; I486-NEXT: movl $1, 8(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: movl %esp, %edx
+; I486-NEXT: movl %ecx, 4(%edx)
+; I486-NEXT: movl $2, 20(%edx)
+; I486-NEXT: movl $2, 16(%edx)
+; I486-NEXT: movl $0, 12(%edx)
+; I486-NEXT: movl $1, 8(%edx)
+; I486-NEXT: movl $sc64, (%edx)
+; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_compare_exchange_8
; I486-NEXT: movl %ebp, %esp
; I486-NEXT: popl %ebp
@@ -732,17 +794,20 @@ define void @atomic_fetch_store64(i64 %x) nounwind {
;
; I486-LABEL: atomic_fetch_store64:
; I486: # %bb.0:
-; I486-NEXT: subl $16, %esp
-; I486-NEXT: movl {{[0-9]+}}(%esp), %edx
+; I486-NEXT: pushl %esi
+; I486-NEXT: subl $20, %esp
+; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl %edx, 8(%eax)
-; I486-NEXT: movl %ecx, 4(%eax)
-; I486-NEXT: movl $3, 12(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: leal sc64, %edx
+; I486-NEXT: movl %esp, %esi
+; I486-NEXT: movl %eax, 8(%esi)
+; I486-NEXT: movl %ecx, 4(%esi)
+; I486-NEXT: movl $3, 12(%esi)
+; I486-NEXT: movl $sc64, (%esi)
+; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_store_8
-; I486-NEXT: addl $16, %esp
+; I486-NEXT: addl $20, %esp
+; I486-NEXT: popl %esi
; I486-NEXT: retl
store atomic i64 %x, i64* @sc64 release, align 8
ret void
@@ -756,17 +821,20 @@ define void @atomic_fetch_swap64(i64 %x) nounwind {
;
; I486-LABEL: atomic_fetch_swap64:
; I486: # %bb.0:
-; I486-NEXT: subl $16, %esp
-; I486-NEXT: movl {{[0-9]+}}(%esp), %edx
+; I486-NEXT: pushl %esi
+; I486-NEXT: subl $20, %esp
+; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: leal sc64, %eax
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl %edx, 8(%eax)
-; I486-NEXT: movl %ecx, 4(%eax)
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $sc64, (%eax)
+; I486-NEXT: leal sc64, %edx
+; I486-NEXT: movl %esp, %esi
+; I486-NEXT: movl %eax, 8(%esi)
+; I486-NEXT: movl %ecx, 4(%esi)
+; I486-NEXT: movl $2, 12(%esi)
+; I486-NEXT: movl $sc64, (%esi)
+; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_exchange_8
-; I486-NEXT: addl $16, %esp
+; I486-NEXT: addl $20, %esp
+; I486-NEXT: popl %esi
; I486-NEXT: retl
%t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
ret void
@@ -783,20 +851,23 @@ define void @atomic_fetch_swapf64(double %x) nounwind {
; I486: # %bb.0:
; I486-NEXT: pushl %ebp
; I486-NEXT: movl %esp, %ebp
+; I486-NEXT: pushl %esi
; I486-NEXT: andl $-8, %esp
-; I486-NEXT: subl $24, %esp
+; I486-NEXT: subl $40, %esp
; I486-NEXT: fldl 8(%ebp)
; I486-NEXT: leal fsc64, %eax
; I486-NEXT: fstpl {{[0-9]+}}(%esp)
; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I486-NEXT: movl {{[0-9]+}}(%esp), %edx
-; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl %edx, 8(%eax)
-; I486-NEXT: movl %ecx, 4(%eax)
-; I486-NEXT: movl $2, 12(%eax)
-; I486-NEXT: movl $fsc64, (%eax)
+; I486-NEXT: movl %esp, %esi
+; I486-NEXT: movl %edx, 8(%esi)
+; I486-NEXT: movl %ecx, 4(%esi)
+; I486-NEXT: movl $2, 12(%esi)
+; I486-NEXT: movl $fsc64, (%esi)
+; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_exchange_8
-; I486-NEXT: movl %ebp, %esp
+; I486-NEXT: leal -4(%ebp), %esp
+; I486-NEXT: popl %esi
; I486-NEXT: popl %ebp
; I486-NEXT: retl
%t1 = atomicrmw xchg double* @fsc64, double %x acquire
diff --git a/llvm/test/CodeGen/X86/atomic6432.ll b/llvm/test/CodeGen/X86/atomic6432.ll
index b83d7ba09ac3..31cc79536824 100644
--- a/llvm/test/CodeGen/X86/atomic6432.ll
+++ b/llvm/test/CodeGen/X86/atomic6432.ll
@@ -7,98 +7,106 @@ define void @atomic_fetch_add64() nounwind {
; X32-LABEL: atomic_fetch_add64:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
-; X32-NEXT: subl $72, %esp
-; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: subl $56, %esp
+; X32-NEXT: movl sc64+4, %eax
+; X32-NEXT: movl sc64, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB0_1
; X32-NEXT: .LBB0_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl $1, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl $1, %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB0_1
; X32-NEXT: jmp .LBB0_2
; X32-NEXT: .LBB0_2: # %atomicrmw.end
-; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: movl sc64+4, %eax
+; X32-NEXT: movl sc64, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB0_3
; X32-NEXT: .LBB0_3: # %atomicrmw.start2
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl $3, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl $3, %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB0_3
; X32-NEXT: jmp .LBB0_4
; X32-NEXT: .LBB0_4: # %atomicrmw.end1
-; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: movl sc64+4, %eax
+; X32-NEXT: movl sc64, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB0_5
; X32-NEXT: .LBB0_5: # %atomicrmw.start8
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl $5, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl $5, %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB0_5
; X32-NEXT: jmp .LBB0_6
; X32-NEXT: .LBB0_6: # %atomicrmw.end7
-; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: movl sc64+4, %eax
+; X32-NEXT: movl sc64, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB0_7
; X32-NEXT: .LBB0_7: # %atomicrmw.start14
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %edi, %ecx
+; X32-NEXT: movl (%esp), %ebx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB0_7
; X32-NEXT: jmp .LBB0_8
; X32-NEXT: .LBB0_8: # %atomicrmw.end13
-; X32-NEXT: addl $72, %esp
+; X32-NEXT: addl $56, %esp
; X32-NEXT: popl %esi
+; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
; X32-NEXT: retl
entry:
@@ -113,98 +121,106 @@ define void @atomic_fetch_sub64() nounwind {
; X32-LABEL: atomic_fetch_sub64:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
-; X32-NEXT: subl $72, %esp
-; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: subl $56, %esp
+; X32-NEXT: movl sc64+4, %eax
+; X32-NEXT: movl sc64, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB1_1
; X32-NEXT: .LBB1_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl $-1, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: adcl $-1, %ecx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl $-1, %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: adcl $-1, %esi
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB1_1
; X32-NEXT: jmp .LBB1_2
; X32-NEXT: .LBB1_2: # %atomicrmw.end
-; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: movl sc64+4, %eax
+; X32-NEXT: movl sc64, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB1_3
; X32-NEXT: .LBB1_3: # %atomicrmw.start2
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl $-3, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: adcl $-1, %ecx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl $-3, %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: adcl $-1, %esi
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB1_3
; X32-NEXT: jmp .LBB1_4
; X32-NEXT: .LBB1_4: # %atomicrmw.end1
-; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: movl sc64+4, %eax
+; X32-NEXT: movl sc64, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB1_5
; X32-NEXT: .LBB1_5: # %atomicrmw.start8
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl $-5, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: adcl $-1, %ecx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl $-5, %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: adcl $-1, %esi
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB1_5
; X32-NEXT: jmp .LBB1_6
; X32-NEXT: .LBB1_6: # %atomicrmw.end7
-; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: movl sc64+4, %eax
+; X32-NEXT: movl sc64, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB1_7
; X32-NEXT: .LBB1_7: # %atomicrmw.start14
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: subl %ecx, %ebx
-; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: sbbl %esi, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: subl %esi, %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: sbbl %ebx, %edi
+; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %edi, %ecx
+; X32-NEXT: movl (%esp), %ebx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB1_7
; X32-NEXT: jmp .LBB1_8
; X32-NEXT: .LBB1_8: # %atomicrmw.end13
-; X32-NEXT: addl $72, %esp
+; X32-NEXT: addl $56, %esp
; X32-NEXT: popl %esi
+; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
; X32-NEXT: retl
%t1 = atomicrmw sub i64* @sc64, i64 1 acquire
@@ -218,75 +234,83 @@ define void @atomic_fetch_and64() nounwind {
; X32-LABEL: atomic_fetch_and64:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
-; X32-NEXT: subl $52, %esp
-; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: subl $44, %esp
+; X32-NEXT: movl sc64+4, %eax
+; X32-NEXT: movl sc64, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB2_1
; X32-NEXT: .LBB2_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: andl $3, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: xorl %ecx, %ecx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: andl $3, %ecx
+; X32-NEXT: xorl %esi, %esi
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB2_1
; X32-NEXT: jmp .LBB2_2
; X32-NEXT: .LBB2_2: # %atomicrmw.end
-; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: movl sc64+4, %eax
+; X32-NEXT: movl sc64, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB2_3
; X32-NEXT: .LBB2_3: # %atomicrmw.start2
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: andl $1, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, %ecx
; X32-NEXT: andl $1, %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: andl $1, %esi
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB2_3
; X32-NEXT: jmp .LBB2_4
; X32-NEXT: .LBB2_4: # %atomicrmw.end1
-; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: movl sc64+4, %eax
+; X32-NEXT: movl sc64, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB2_5
; X32-NEXT: .LBB2_5: # %atomicrmw.start8
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: andl %ecx, %ebx
-; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill
-; X32-NEXT: movl %edx, %ecx
; X32-NEXT: andl %esi, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: andl %ebx, %edi
+; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %edi, %ecx
+; X32-NEXT: movl (%esp), %ebx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB2_5
; X32-NEXT: jmp .LBB2_6
; X32-NEXT: .LBB2_6: # %atomicrmw.end7
-; X32-NEXT: addl $52, %esp
+; X32-NEXT: addl $44, %esp
; X32-NEXT: popl %esi
+; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
; X32-NEXT: retl
%t1 = atomicrmw and i64* @sc64, i64 3 acquire
@@ -299,75 +323,84 @@ define void @atomic_fetch_or64() nounwind {
; X32-LABEL: atomic_fetch_or64:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
-; X32-NEXT: subl $52, %esp
-; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: subl $48, %esp
+; X32-NEXT: movl sc64+4, %eax
+; X32-NEXT: movl sc64, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB3_1
; X32-NEXT: .LBB3_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: orl $3, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: orl $3, %ecx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB3_1
; X32-NEXT: jmp .LBB3_2
; X32-NEXT: .LBB3_2: # %atomicrmw.end
-; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: movl sc64+4, %eax
+; X32-NEXT: movl sc64, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB3_3
; X32-NEXT: .LBB3_3: # %atomicrmw.start2
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: orl $1, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, %ecx
; X32-NEXT: orl $1, %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: orl $1, %esi
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB3_3
; X32-NEXT: jmp .LBB3_4
; X32-NEXT: .LBB3_4: # %atomicrmw.end1
-; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: movl sc64+4, %eax
+; X32-NEXT: movl sc64, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB3_5
; X32-NEXT: .LBB3_5: # %atomicrmw.start8
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: orl %ecx, %ebx
-; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill
-; X32-NEXT: movl %edx, %ecx
; X32-NEXT: orl %esi, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: orl %ebx, %edi
+; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %edi, %ecx
+; X32-NEXT: movl (%esp), %ebx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB3_5
; X32-NEXT: jmp .LBB3_6
; X32-NEXT: .LBB3_6: # %atomicrmw.end7
-; X32-NEXT: addl $52, %esp
+; X32-NEXT: addl $48, %esp
; X32-NEXT: popl %esi
+; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
; X32-NEXT: retl
%t1 = atomicrmw or i64* @sc64, i64 3 acquire
@@ -380,75 +413,84 @@ define void @atomic_fetch_xor64() nounwind {
; X32-LABEL: atomic_fetch_xor64:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
-; X32-NEXT: subl $52, %esp
-; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: subl $48, %esp
+; X32-NEXT: movl sc64+4, %eax
+; X32-NEXT: movl sc64, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB4_1
; X32-NEXT: .LBB4_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: xorl $3, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: xorl $3, %ecx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB4_1
; X32-NEXT: jmp .LBB4_2
; X32-NEXT: .LBB4_2: # %atomicrmw.end
-; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: movl sc64+4, %eax
+; X32-NEXT: movl sc64, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB4_3
; X32-NEXT: .LBB4_3: # %atomicrmw.start2
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: xorl $1, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, %ecx
; X32-NEXT: xorl $1, %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: xorl $1, %esi
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB4_3
; X32-NEXT: jmp .LBB4_4
; X32-NEXT: .LBB4_4: # %atomicrmw.end1
-; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: movl sc64+4, %eax
+; X32-NEXT: movl sc64, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB4_5
; X32-NEXT: .LBB4_5: # %atomicrmw.start8
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: xorl %ecx, %ebx
-; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill
-; X32-NEXT: movl %edx, %ecx
; X32-NEXT: xorl %esi, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: xorl %ebx, %edi
+; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %edi, %ecx
+; X32-NEXT: movl (%esp), %ebx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB4_5
; X32-NEXT: jmp .LBB4_6
; X32-NEXT: .LBB4_6: # %atomicrmw.end7
-; X32-NEXT: addl $52, %esp
+; X32-NEXT: addl $48, %esp
; X32-NEXT: popl %esi
+; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
; X32-NEXT: retl
%t1 = atomicrmw xor i64* @sc64, i64 3 acquire
@@ -463,39 +505,36 @@ define void @atomic_fetch_nand64(i64 %x) nounwind {
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
-; X32-NEXT: subl $32, %esp
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: subl $16, %esp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: movl sc64, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: jmp .LBB5_1
; X32-NEXT: .LBB5_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: movl (%esp), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: andl %edi, %ecx
-; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: andl %esi, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: notl %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: andl %esi, %ecx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: andl %ebx, %edi
+; X32-NEXT: notl %edi
; X32-NEXT: notl %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, %ebx
; X32-NEXT: lock cmpxchg8b sc64
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB5_1
; X32-NEXT: jmp .LBB5_2
; X32-NEXT: .LBB5_2: # %atomicrmw.end
-; X32-NEXT: addl $32, %esp
+; X32-NEXT: addl $16, %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
@@ -508,41 +547,42 @@ define void @atomic_fetch_max64(i64 %x) nounwind {
; X32-LABEL: atomic_fetch_max64:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
-; X32-NEXT: subl $32, %esp
+; X32-NEXT: subl $24, %esp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: movl sc64, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB6_1
; X32-NEXT: .LBB6_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ebx, %esi
-; X32-NEXT: subl %eax, %esi
-; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
-; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: subl %eax, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: sbbl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: cmovll %edx, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: cmovll %edx, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: cmovll %eax, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, %ecx
; X32-NEXT: lock cmpxchg8b sc64
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
; X32-NEXT: jne .LBB6_1
; X32-NEXT: jmp .LBB6_2
; X32-NEXT: .LBB6_2: # %atomicrmw.end
-; X32-NEXT: addl $32, %esp
+; X32-NEXT: addl $24, %esp
; X32-NEXT: popl %esi
+; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
; X32-NEXT: retl
%t1 = atomicrmw max i64* @sc64, i64 %x acquire
@@ -553,41 +593,42 @@ define void @atomic_fetch_min64(i64 %x) nounwind {
; X32-LABEL: atomic_fetch_min64:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
-; X32-NEXT: subl $32, %esp
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: subl $24, %esp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: movl sc64, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB7_1
; X32-NEXT: .LBB7_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ebx, %esi
-; X32-NEXT: subl %eax, %esi
-; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
-; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: subl %eax, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: sbbl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: cmovgel %edx, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: cmovgel %edx, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: cmovgel %eax, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, %ecx
; X32-NEXT: lock cmpxchg8b sc64
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
; X32-NEXT: jne .LBB7_1
; X32-NEXT: jmp .LBB7_2
; X32-NEXT: .LBB7_2: # %atomicrmw.end
-; X32-NEXT: addl $32, %esp
+; X32-NEXT: addl $24, %esp
; X32-NEXT: popl %esi
+; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
; X32-NEXT: retl
%t1 = atomicrmw min i64* @sc64, i64 %x acquire
@@ -598,41 +639,42 @@ define void @atomic_fetch_umax64(i64 %x) nounwind {
; X32-LABEL: atomic_fetch_umax64:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
-; X32-NEXT: subl $32, %esp
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: subl $24, %esp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: movl sc64, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB8_1
; X32-NEXT: .LBB8_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ebx, %esi
-; X32-NEXT: subl %eax, %esi
-; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
-; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: subl %eax, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: sbbl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: cmovbl %edx, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: cmovbl %edx, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: cmovbl %eax, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, %ecx
; X32-NEXT: lock cmpxchg8b sc64
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
; X32-NEXT: jne .LBB8_1
; X32-NEXT: jmp .LBB8_2
; X32-NEXT: .LBB8_2: # %atomicrmw.end
-; X32-NEXT: addl $32, %esp
+; X32-NEXT: addl $24, %esp
; X32-NEXT: popl %esi
+; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
; X32-NEXT: retl
%t1 = atomicrmw umax i64* @sc64, i64 %x acquire
@@ -643,41 +685,42 @@ define void @atomic_fetch_umin64(i64 %x) nounwind {
; X32-LABEL: atomic_fetch_umin64:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
-; X32-NEXT: subl $32, %esp
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: subl $24, %esp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
+; X32-NEXT: movl sc64, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB9_1
; X32-NEXT: .LBB9_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ebx, %esi
-; X32-NEXT: subl %eax, %esi
-; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
-; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: subl %eax, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: sbbl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: cmovael %edx, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: cmovael %edx, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: cmovael %eax, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, %ecx
; X32-NEXT: lock cmpxchg8b sc64
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
; X32-NEXT: jne .LBB9_1
; X32-NEXT: jmp .LBB9_2
; X32-NEXT: .LBB9_2: # %atomicrmw.end
-; X32-NEXT: addl $32, %esp
+; X32-NEXT: addl $24, %esp
; X32-NEXT: popl %esi
+; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
; X32-NEXT: retl
%t1 = atomicrmw umin i64* @sc64, i64 %x acquire
@@ -688,11 +731,14 @@ define void @atomic_fetch_cmpxchg64() nounwind {
; X32-LABEL: atomic_fetch_cmpxchg64:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
-; X32-NEXT: xorl %ecx, %ecx
+; X32-NEXT: pushl %eax
+; X32-NEXT: xorl %eax, %eax
; X32-NEXT: movl $1, %ebx
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: movl (%esp), %edx # 4-byte Reload
+; X32-NEXT: movl (%esp), %ecx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
+; X32-NEXT: addl $4, %esp
; X32-NEXT: popl %ebx
; X32-NEXT: retl
%t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire acquire
@@ -717,24 +763,24 @@ define void @atomic_fetch_swap64(i64 %x) nounwind {
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
; X32-NEXT: subl $16, %esp
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl sc64+4, %edx
-; X32-NEXT: movl sc64, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X32-NEXT: movl sc64+4, %eax
+; X32-NEXT: movl sc64, %edx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: jmp .LBB12_1
; X32-NEXT: .LBB12_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: movl (%esp), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl (%esp), %ecx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB12_1
; X32-NEXT: jmp .LBB12_2
; X32-NEXT: .LBB12_2: # %atomicrmw.end
diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll
index 7bd255c13025..f448bfec2ec9 100644
--- a/llvm/test/CodeGen/X86/avx-load-store.ll
+++ b/llvm/test/CodeGen/X86/avx-load-store.ll
@@ -34,27 +34,27 @@ define void @test_256_load(double* nocapture %d, float* nocapture %f, <4 x i64>*
;
; CHECK_O0-LABEL: test_256_load:
; CHECK_O0: # %bb.0: # %entry
-; CHECK_O0-NEXT: subq $184, %rsp
-; CHECK_O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK_O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK_O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK_O0-NEXT: subq $152, %rsp
; CHECK_O0-NEXT: vmovapd (%rdi), %ymm0
-; CHECK_O0-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
; CHECK_O0-NEXT: vmovaps (%rsi), %ymm1
-; CHECK_O0-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK_O0-NEXT: vmovdqa (%rdx), %ymm2
+; CHECK_O0-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK_O0-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK_O0-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK_O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK_O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK_O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK_O0-NEXT: callq dummy
-; CHECK_O0-NEXT: vmovups (%rsp), %ymm2 # 32-byte Reload
-; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
-; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK_O0-NEXT: vmovapd %ymm0, (%rax)
+; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; CHECK_O0-NEXT: vmovaps %ymm1, (%rcx)
; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; CHECK_O0-NEXT: vmovapd %ymm2, (%rdi)
-; CHECK_O0-NEXT: vmovaps %ymm1, (%rsi)
-; CHECK_O0-NEXT: vmovdqa %ymm0, (%rdx)
-; CHECK_O0-NEXT: addq $184, %rsp
+; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
+; CHECK_O0-NEXT: vmovdqa %ymm2, (%rdx)
+; CHECK_O0-NEXT: addq $152, %rsp
; CHECK_O0-NEXT: vzeroupper
; CHECK_O0-NEXT: retq
entry:
@@ -173,10 +173,9 @@ define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp
;
; CHECK_O0-LABEL: double_save:
; CHECK_O0: # %bb.0:
+; CHECK_O0-NEXT: # implicit-def: $ymm2
; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2
-; CHECK_O0-NEXT: # implicit-def: $ymm0
-; CHECK_O0-NEXT: vmovaps %xmm2, %xmm0
-; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0
; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi)
; CHECK_O0-NEXT: vzeroupper
; CHECK_O0-NEXT: retq
@@ -196,10 +195,9 @@ define void @double_save_volatile(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nou
;
; CHECK_O0-LABEL: double_save_volatile:
; CHECK_O0: # %bb.0:
+; CHECK_O0-NEXT: # implicit-def: $ymm2
; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2
-; CHECK_O0-NEXT: # implicit-def: $ymm0
-; CHECK_O0-NEXT: vmovaps %xmm2, %xmm0
-; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0
; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi)
; CHECK_O0-NEXT: vzeroupper
; CHECK_O0-NEXT: retq
@@ -274,11 +272,11 @@ define void @add8i32(<8 x i32>* %ret, <8 x i32>* %bp) nounwind {
;
; CHECK_O0-LABEL: add8i32:
; CHECK_O0: # %bb.0:
-; CHECK_O0-NEXT: vmovdqu (%rsi), %xmm2
+; CHECK_O0-NEXT: vmovdqu (%rsi), %xmm0
; CHECK_O0-NEXT: vmovdqu 16(%rsi), %xmm1
-; CHECK_O0-NEXT: # implicit-def: $ymm0
-; CHECK_O0-NEXT: vmovaps %xmm2, %xmm0
-; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK_O0-NEXT: # implicit-def: $ymm2
+; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2
+; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0
; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi)
; CHECK_O0-NEXT: vzeroupper
; CHECK_O0-NEXT: retq
@@ -319,11 +317,11 @@ define void @add4i64a16(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
;
; CHECK_O0-LABEL: add4i64a16:
; CHECK_O0: # %bb.0:
-; CHECK_O0-NEXT: vmovdqa (%rsi), %xmm2
+; CHECK_O0-NEXT: vmovdqa (%rsi), %xmm0
; CHECK_O0-NEXT: vmovdqa 16(%rsi), %xmm1
-; CHECK_O0-NEXT: # implicit-def: $ymm0
-; CHECK_O0-NEXT: vmovaps %xmm2, %xmm0
-; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK_O0-NEXT: # implicit-def: $ymm2
+; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2
+; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0
; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi)
; CHECK_O0-NEXT: vzeroupper
; CHECK_O0-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll
index 1bc5e104512e..186370ca675c 100755
--- a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll
@@ -19,41 +19,44 @@ define void @test_xmm(i32 %shift, i32 %mulp, <2 x i64> %a,i8* %arraydecay,i8* %f
; CHECK: ## %bb.0:
; CHECK-NEXT: subq $56, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: vpmovw2m %xmm0, %k0
+; CHECK-NEXT: movl $2, %esi
+; CHECK-NEXT: movl $8, %eax
; CHECK-NEXT: movq %rdx, %rdi
-; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
-; CHECK-NEXT: vpmovw2m %xmm0, %k0
; CHECK-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; CHECK-NEXT: movl $2, %esi
-; CHECK-NEXT: movl $8, %edx
; CHECK-NEXT: callq _calc_expected_mask_val
-; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload
-; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload
-; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
; CHECK-NEXT: ## kill: def $eax killed $eax killed $rax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: movzwl %ax, %esi
+; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload
; CHECK-NEXT: kmovb %k0, %edi
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
; CHECK-NEXT: callq _check_mask16
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
-; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload
; CHECK-NEXT: vpmovd2m %xmm0, %k0
-; CHECK-NEXT: ## kill: def $k1 killed $k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: ## kill: def $al killed $al killed $eax
-; CHECK-NEXT: movzbl %al, %eax
-; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
-; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; CHECK-NEXT: kmovq %k0, %k1
+; CHECK-NEXT: kmovd %k0, %ecx
+; CHECK-NEXT: ## kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: ## kill: def $cx killed $cx killed $ecx
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload
; CHECK-NEXT: movl $4, %edx
; CHECK-NEXT: movl %edx, %esi
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; CHECK-NEXT: movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; CHECK-NEXT: callq _calc_expected_mask_val
-; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %si ## 2-byte Reload
-; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload
-; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
; CHECK-NEXT: ## kill: def $ax killed $ax killed $rax
-; CHECK-NEXT: movzwl %si, %edi
+; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx ## 2-byte Reload
+; CHECK-NEXT: movzwl %cx, %edi
; CHECK-NEXT: movzwl %ax, %esi
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
; CHECK-NEXT: callq _check_mask16
; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/bug47278-eflags-error.mir b/llvm/test/CodeGen/X86/bug47278-eflags-error.mir
deleted file mode 100644
index e4e68451850b..000000000000
--- a/llvm/test/CodeGen/X86/bug47278-eflags-error.mir
+++ /dev/null
@@ -1,78 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=i386-unknown-linux-musl -verify-machineinstrs -run-pass=regallocfast -o - %s | FileCheck %s
-
-# Test for correct management of allocatable and non-allocatable
-# live-ins in fastregalloc
-
----
-name: live_through_ecx
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: live_through_ecx
- ; CHECK: bb.0:
- ; CHECK: successors: %bb.1(0x80000000)
- ; CHECK: liveins: $ecx
- ; CHECK: NOOP implicit $ecx
- ; CHECK: bb.1:
- ; CHECK: liveins: $ecx
- ; CHECK: RET implicit killed $ecx
- bb.0:
- liveins: $ecx
- NOOP implicit $ecx
-
- bb.1:
- liveins: $ecx
-
- RET implicit $ecx
-
-...
-
----
-name: live_out_ecx
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: live_out_ecx
- ; CHECK: bb.0:
- ; CHECK: successors: %bb.1(0x80000000)
- ; CHECK: liveins: $eax, $ebx
- ; CHECK: renamable $ecx = COPY killed $ebx
- ; CHECK: bb.1:
- ; CHECK: liveins: $ecx
- ; CHECK: RET implicit killed $ecx
- bb.0:
- liveins: $eax, $ebx
- %0:gr32 = COPY $eax
- %1:gr32 = COPY $ebx
- $ecx = COPY %1
-
- bb.1:
- liveins: $ecx
-
- RET implicit $ecx
-
-...
-
----
-name: live_out_eflags
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: live_out_eflags
- ; CHECK: bb.0:
- ; CHECK: successors: %bb.1(0x80000000)
- ; CHECK: liveins: $eax, $ebx
- ; CHECK: TEST32rr killed renamable $eax, killed renamable $ebx, implicit-def $eflags
- ; CHECK: bb.1:
- ; CHECK: liveins: $eflags
- ; CHECK: RET implicit killed $eflags
- bb.0:
- liveins: $eax, $ebx
- %0:gr32 = COPY $eax
- %1:gr32 = COPY $ebx
- TEST32rr %0, %1, implicit-def $eflags
-
- bb.1:
- liveins: $eflags
-
- RET implicit $eflags
-
-...
diff --git a/llvm/test/CodeGen/X86/bug47278.mir b/llvm/test/CodeGen/X86/bug47278.mir
deleted file mode 100644
index d2ac8f19a85e..000000000000
--- a/llvm/test/CodeGen/X86/bug47278.mir
+++ /dev/null
@@ -1,45 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=i386-unknown-linux-musl -verify-machineinstrs -run-pass=regallocfast -o - %s | FileCheck %s
-
-# Make sure this case doesn't assert or try to assign $ecx to %1 on
-# SHRD32rrCL
-
----
-name: foo
-tracksRegLiveness: true
-body: |
- bb.0:
- ; CHECK-LABEL: name: foo
- ; CHECK: renamable $eax = IMPLICIT_DEF
- ; CHECK: renamable $edx = MOVZX32rm8 renamable $eax, 1, $noreg, 0, $noreg :: (load 1 from `i168* undef` + 20, align 16)
- ; CHECK: dead renamable $ecx = MOV32rm renamable $eax, 1, $noreg, 0, $noreg :: (load 4 from `i168* undef` + 12, align 16)
- ; CHECK: renamable $al = MOV8rm killed renamable $eax, 1, $noreg, 0, $noreg :: (load 1 from `i32* undef`, align 4)
- ; CHECK: dead renamable $ecx = COPY renamable $edx
- ; CHECK: dead renamable $ecx = COPY renamable $edx
- ; CHECK: dead renamable $ecx = COPY renamable $edx
- ; CHECK: renamable $esi = IMPLICIT_DEF
- ; CHECK: renamable $ecx = IMPLICIT_DEF
- ; CHECK: renamable $ecx = CMOV32rr renamable $ecx, killed renamable $esi, 2, implicit undef $eflags
- ; CHECK: renamable $cl = MOV8ri -128
- ; CHECK: $cl = IMPLICIT_DEF
- ; CHECK: renamable $eax = COPY renamable $edx
- ; CHECK: dead renamable $eax = SHRD32rrCL renamable $eax, killed renamable $edx, implicit-def dead $eflags, implicit killed $cl
- ; CHECK: RETL
- %0:gr32 = IMPLICIT_DEF
- %1:gr32 = MOVZX32rm8 %0, 1, $noreg, 0, $noreg :: (load 1 from `i168* undef` + 20, align 16)
- %2:gr32 = MOV32rm %0, 1, $noreg, 0, $noreg :: (load 4 from `i168* undef` + 12, align 16)
- %3:gr8 = MOV8rm %0, 1, $noreg, 0, $noreg :: (load 1 from `i32* undef`, align 4)
- %4:gr32 = COPY %1
- %5:gr32 = COPY %1
- %6:gr32 = COPY %1
- %7:gr32 = IMPLICIT_DEF
- %8:gr32 = IMPLICIT_DEF
- %8:gr32 = CMOV32rr %8, killed %7, 2, implicit undef $eflags
- %9:gr8 = MOV8ri -128
- %9:gr8 = COPY %3
- $cl = IMPLICIT_DEF
- %8:gr32 = COPY %1
- %8:gr32 = SHRD32rrCL %8, %1, implicit-def dead $eflags, implicit $cl
- RETL
-
-...
diff --git a/llvm/test/CodeGen/X86/crash-O0.ll b/llvm/test/CodeGen/X86/crash-O0.ll
index 54f7c7597e50..9f9e5584d6f2 100644
--- a/llvm/test/CodeGen/X86/crash-O0.ll
+++ b/llvm/test/CodeGen/X86/crash-O0.ll
@@ -16,15 +16,14 @@ define i32 @div8() nounwind {
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
-; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill
; CHECK-NEXT: ## implicit-def: $rcx
; CHECK-NEXT: ## kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill
; CHECK-NEXT: movzbw %al, %ax
; CHECK-NEXT: divb %cl
-; CHECK-NEXT: movb %al, %dl
-; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al ## 1-byte Reload
-; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill
-; CHECK-NEXT: movzbw %al, %ax
+; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %dl ## 1-byte Reload
+; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill
+; CHECK-NEXT: movzbw %dl, %ax
; CHECK-NEXT: divb %cl
; CHECK-NEXT: shrw $8, %ax
; CHECK-NEXT: ## kill: def $al killed $al killed $ax
@@ -32,11 +31,11 @@ define i32 @div8() nounwind {
; CHECK-NEXT: jae LBB0_2
; CHECK-NEXT: ## %bb.1: ## %"39"
; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al ## 1-byte Reload
-; CHECK-NEXT: movzbl %al, %eax
-; CHECK-NEXT: ## implicit-def: $ecx
-; CHECK-NEXT: imull %ecx, %eax
-; CHECK-NEXT: addl %ecx, %eax
-; CHECK-NEXT: cmpl %ecx, %eax
+; CHECK-NEXT: movzbl %al, %ecx
+; CHECK-NEXT: ## implicit-def: $edx
+; CHECK-NEXT: imull %edx, %ecx
+; CHECK-NEXT: addl %edx, %ecx
+; CHECK-NEXT: cmpl %edx, %ecx
; CHECK-NEXT: je LBB0_3
; CHECK-NEXT: LBB0_2: ## %"40"
; CHECK-NEXT: ud2
@@ -80,11 +79,12 @@ define i64 @addressModeWith32bitIndex(i32 %V) {
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: .cfi_def_cfa_register %rbp
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: movq %rcx, %rax
+; CHECK-NEXT: ## kill: def $rax killed $eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK-NEXT: cqto
-; CHECK-NEXT: movslq %edi, %rsi
-; CHECK-NEXT: idivq (%rcx,%rsi,8)
+; CHECK-NEXT: movslq %edi, %rcx
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Reload
+; CHECK-NEXT: idivq (%rsi,%rcx,8)
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
%gep = getelementptr i64, i64* null, i32 %V
diff --git a/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll b/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll
index a66b74a19066..664d9ded1e0e 100644
--- a/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll
+++ b/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll
@@ -8,8 +8,7 @@ bb:
%tmp = load i32, i32* %p, align 4, !dbg !7
; CHECK: $eax = MOV32rm killed {{.*}} $rdi, {{.*}} debug-location !7 :: (load 4 from %ir.p)
; CHECK-NEXT: $rax = KILL killed renamable $eax, debug-location !7
- ; CHECK-NEXT: MOV64mr $rsp, 1, $noreg, -8, $noreg, $rax :: (store 8 into %stack.0)
- ; CHECK-NEXT: SUB64ri8 renamable $rax, 3, implicit-def $eflags, debug-location !7
+ ; CHECK-NEXT: $rcx = MOV64rr $rax, debug-location !7
switch i32 %tmp, label %bb7 [
i32 0, label %bb1
diff --git a/llvm/test/CodeGen/X86/fast-isel-cmp-branch.ll b/llvm/test/CodeGen/X86/fast-isel-cmp-branch.ll
index 9a54c8711f37..e262448468eb 100644
--- a/llvm/test/CodeGen/X86/fast-isel-cmp-branch.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-cmp-branch.ll
@@ -19,7 +19,7 @@ exit:
;
diff erent basic block, so its operands aren't necessarily exported
; for cross-block usage.
-; CHECK: movb %cl, [[OFS:[0-9]*]](%rsp)
+; CHECK: movb %al, [[OFS:[0-9]*]](%rsp)
; CHECK: callq {{_?}}bar
; CHECK: movb [[OFS]](%rsp), %al
diff --git a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll
index 56c2812481ca..7fffa21f0d24 100644
--- a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll
@@ -586,11 +586,11 @@ define <8 x float> @test_load_nt8xfloat(<8 x float>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt8xfloat:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
-; AVX1-NEXT: # implicit-def: $ymm0
-; AVX1-NEXT: vmovaps %xmm1, %xmm0
-; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: $ymm1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt8xfloat:
@@ -628,11 +628,11 @@ define <4 x double> @test_load_nt4xdouble(<4 x double>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt4xdouble:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
-; AVX1-NEXT: # implicit-def: $ymm0
-; AVX1-NEXT: vmovaps %xmm1, %xmm0
-; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: $ymm1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt4xdouble:
@@ -670,11 +670,11 @@ define <32 x i8> @test_load_nt32xi8(<32 x i8>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt32xi8:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
-; AVX1-NEXT: # implicit-def: $ymm0
-; AVX1-NEXT: vmovaps %xmm1, %xmm0
-; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: $ymm1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt32xi8:
@@ -712,11 +712,11 @@ define <16 x i16> @test_load_nt16xi16(<16 x i16>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt16xi16:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
-; AVX1-NEXT: # implicit-def: $ymm0
-; AVX1-NEXT: vmovaps %xmm1, %xmm0
-; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: $ymm1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt16xi16:
@@ -754,11 +754,11 @@ define <8 x i32> @test_load_nt8xi32(<8 x i32>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt8xi32:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
-; AVX1-NEXT: # implicit-def: $ymm0
-; AVX1-NEXT: vmovaps %xmm1, %xmm0
-; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: $ymm1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt8xi32:
@@ -796,11 +796,11 @@ define <4 x i64> @test_load_nt4xi64(<4 x i64>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt4xi64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
-; AVX1-NEXT: # implicit-def: $ymm0
-; AVX1-NEXT: vmovaps %xmm1, %xmm0
-; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: $ymm1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt4xi64:
@@ -889,7 +889,6 @@ define void @test_nt64xi8(<64 x i8>* nocapture %ptr, <64 x i8> %X) {
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
-
; AVX512-LABEL: test_nt64xi8:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovntdq %zmm0, (%rdi)
@@ -916,7 +915,6 @@ define void @test_nt32xi16(<32 x i16>* nocapture %ptr, <32 x i16> %X) {
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
-
; AVX512-LABEL: test_nt32xi16:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovntdq %zmm0, (%rdi)
@@ -1010,16 +1008,16 @@ define <16 x float> @test_load_nt16xfloat(<16 x float>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt16xfloat:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
-; AVX1-NEXT: # implicit-def: $ymm0
-; AVX1-NEXT: vmovaps %xmm1, %xmm0
-; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
; AVX1-NEXT: # implicit-def: $ymm1
-; AVX1-NEXT: vmovaps %xmm2, %xmm1
-; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT: # implicit-def: $ymm2
+; AVX1-NEXT: vmovaps %xmm1, %xmm2
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt16xfloat:
@@ -1064,16 +1062,16 @@ define <8 x double> @test_load_nt8xdouble(<8 x double>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt8xdouble:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
-; AVX1-NEXT: # implicit-def: $ymm0
-; AVX1-NEXT: vmovaps %xmm1, %xmm0
-; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
; AVX1-NEXT: # implicit-def: $ymm1
-; AVX1-NEXT: vmovaps %xmm2, %xmm1
-; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT: # implicit-def: $ymm2
+; AVX1-NEXT: vmovaps %xmm1, %xmm2
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt8xdouble:
@@ -1118,16 +1116,16 @@ define <64 x i8> @test_load_nt64xi8(<64 x i8>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt64xi8:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
-; AVX1-NEXT: # implicit-def: $ymm0
-; AVX1-NEXT: vmovaps %xmm1, %xmm0
-; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
; AVX1-NEXT: # implicit-def: $ymm1
-; AVX1-NEXT: vmovaps %xmm2, %xmm1
-; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT: # implicit-def: $ymm2
+; AVX1-NEXT: vmovaps %xmm1, %xmm2
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt64xi8:
@@ -1172,16 +1170,16 @@ define <32 x i16> @test_load_nt32xi16(<32 x i16>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt32xi16:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
-; AVX1-NEXT: # implicit-def: $ymm0
-; AVX1-NEXT: vmovaps %xmm1, %xmm0
-; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
; AVX1-NEXT: # implicit-def: $ymm1
-; AVX1-NEXT: vmovaps %xmm2, %xmm1
-; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT: # implicit-def: $ymm2
+; AVX1-NEXT: vmovaps %xmm1, %xmm2
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt32xi16:
@@ -1226,16 +1224,16 @@ define <16 x i32> @test_load_nt16xi32(<16 x i32>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt16xi32:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
-; AVX1-NEXT: # implicit-def: $ymm0
-; AVX1-NEXT: vmovaps %xmm1, %xmm0
-; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
; AVX1-NEXT: # implicit-def: $ymm1
-; AVX1-NEXT: vmovaps %xmm2, %xmm1
-; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT: # implicit-def: $ymm2
+; AVX1-NEXT: vmovaps %xmm1, %xmm2
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt16xi32:
@@ -1280,16 +1278,16 @@ define <8 x i64> @test_load_nt8xi64(<8 x i64>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt8xi64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
-; AVX1-NEXT: # implicit-def: $ymm0
-; AVX1-NEXT: vmovaps %xmm1, %xmm0
-; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
; AVX1-NEXT: # implicit-def: $ymm1
-; AVX1-NEXT: vmovaps %xmm2, %xmm1
-; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT: # implicit-def: $ymm2
+; AVX1-NEXT: vmovaps %xmm1, %xmm2
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt8xi64:
diff --git a/llvm/test/CodeGen/X86/fast-isel-select-sse.ll b/llvm/test/CodeGen/X86/fast-isel-select-sse.ll
index 6f3643436e65..17d2803e9ce1 100644
--- a/llvm/test/CodeGen/X86/fast-isel-select-sse.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-select-sse.ll
@@ -65,15 +65,12 @@ define double @select_fcmp_oeq_f64(double %a, double %b, double %c, double %d) {
define float @select_fcmp_ogt_f32(float %a, float %b, float %c, float %d) {
; SSE-LABEL: select_fcmp_ogt_f32:
; SSE: # %bb.0:
-; SSE-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
-; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: cmpltss %xmm1, %xmm0
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: andps %xmm2, %xmm1
-; SSE-NEXT: andnps %xmm3, %xmm0
-; SSE-NEXT: orps %xmm1, %xmm0
+; SSE-NEXT: cmpltss %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: andps %xmm2, %xmm0
+; SSE-NEXT: andnps %xmm3, %xmm1
+; SSE-NEXT: orps %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_ogt_f32:
@@ -96,15 +93,12 @@ define float @select_fcmp_ogt_f32(float %a, float %b, float %c, float %d) {
define double @select_fcmp_ogt_f64(double %a, double %b, double %c, double %d) {
; SSE-LABEL: select_fcmp_ogt_f64:
; SSE: # %bb.0:
-; SSE-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
-; SSE-NEXT: # xmm0 = mem[0],zero
-; SSE-NEXT: cmpltsd %xmm1, %xmm0
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: andpd %xmm2, %xmm1
-; SSE-NEXT: andnpd %xmm3, %xmm0
-; SSE-NEXT: orpd %xmm1, %xmm0
+; SSE-NEXT: cmpltsd %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: andpd %xmm2, %xmm0
+; SSE-NEXT: andnpd %xmm3, %xmm1
+; SSE-NEXT: orpd %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_ogt_f64:
@@ -127,15 +121,12 @@ define double @select_fcmp_ogt_f64(double %a, double %b, double %c, double %d) {
define float @select_fcmp_oge_f32(float %a, float %b, float %c, float %d) {
; SSE-LABEL: select_fcmp_oge_f32:
; SSE: # %bb.0:
-; SSE-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
-; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: cmpless %xmm1, %xmm0
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: andps %xmm2, %xmm1
-; SSE-NEXT: andnps %xmm3, %xmm0
-; SSE-NEXT: orps %xmm1, %xmm0
+; SSE-NEXT: cmpless %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: andps %xmm2, %xmm0
+; SSE-NEXT: andnps %xmm3, %xmm1
+; SSE-NEXT: orps %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_oge_f32:
@@ -158,15 +149,12 @@ define float @select_fcmp_oge_f32(float %a, float %b, float %c, float %d) {
define double @select_fcmp_oge_f64(double %a, double %b, double %c, double %d) {
; SSE-LABEL: select_fcmp_oge_f64:
; SSE: # %bb.0:
-; SSE-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
-; SSE-NEXT: # xmm0 = mem[0],zero
-; SSE-NEXT: cmplesd %xmm1, %xmm0
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: andpd %xmm2, %xmm1
-; SSE-NEXT: andnpd %xmm3, %xmm0
-; SSE-NEXT: orpd %xmm1, %xmm0
+; SSE-NEXT: cmplesd %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: andpd %xmm2, %xmm0
+; SSE-NEXT: andnpd %xmm3, %xmm1
+; SSE-NEXT: orpd %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_oge_f64:
@@ -513,15 +501,12 @@ define double @select_fcmp_uge_f64(double %a, double %b, double %c, double %d) {
define float @select_fcmp_ult_f32(float %a, float %b, float %c, float %d) {
; SSE-LABEL: select_fcmp_ult_f32:
; SSE: # %bb.0:
-; SSE-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
-; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: cmpnless %xmm1, %xmm0
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: andps %xmm2, %xmm1
-; SSE-NEXT: andnps %xmm3, %xmm0
-; SSE-NEXT: orps %xmm1, %xmm0
+; SSE-NEXT: cmpnless %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: andps %xmm2, %xmm0
+; SSE-NEXT: andnps %xmm3, %xmm1
+; SSE-NEXT: orps %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_ult_f32:
@@ -544,15 +529,12 @@ define float @select_fcmp_ult_f32(float %a, float %b, float %c, float %d) {
define double @select_fcmp_ult_f64(double %a, double %b, double %c, double %d) {
; SSE-LABEL: select_fcmp_ult_f64:
; SSE: # %bb.0:
-; SSE-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
-; SSE-NEXT: # xmm0 = mem[0],zero
-; SSE-NEXT: cmpnlesd %xmm1, %xmm0
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: andpd %xmm2, %xmm1
-; SSE-NEXT: andnpd %xmm3, %xmm0
-; SSE-NEXT: orpd %xmm1, %xmm0
+; SSE-NEXT: cmpnlesd %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: andpd %xmm2, %xmm0
+; SSE-NEXT: andnpd %xmm3, %xmm1
+; SSE-NEXT: orpd %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_ult_f64:
@@ -575,15 +557,12 @@ define double @select_fcmp_ult_f64(double %a, double %b, double %c, double %d) {
define float @select_fcmp_ule_f32(float %a, float %b, float %c, float %d) {
; SSE-LABEL: select_fcmp_ule_f32:
; SSE: # %bb.0:
-; SSE-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
-; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: cmpnltss %xmm1, %xmm0
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: andps %xmm2, %xmm1
-; SSE-NEXT: andnps %xmm3, %xmm0
-; SSE-NEXT: orps %xmm1, %xmm0
+; SSE-NEXT: cmpnltss %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: andps %xmm2, %xmm0
+; SSE-NEXT: andnps %xmm3, %xmm1
+; SSE-NEXT: orps %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_ule_f32:
@@ -606,15 +585,12 @@ define float @select_fcmp_ule_f32(float %a, float %b, float %c, float %d) {
define double @select_fcmp_ule_f64(double %a, double %b, double %c, double %d) {
; SSE-LABEL: select_fcmp_ule_f64:
; SSE: # %bb.0:
-; SSE-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
-; SSE-NEXT: # xmm0 = mem[0],zero
-; SSE-NEXT: cmpnltsd %xmm1, %xmm0
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: andpd %xmm2, %xmm1
-; SSE-NEXT: andnpd %xmm3, %xmm0
-; SSE-NEXT: orpd %xmm1, %xmm0
+; SSE-NEXT: cmpnltsd %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: andpd %xmm2, %xmm0
+; SSE-NEXT: andnpd %xmm3, %xmm1
+; SSE-NEXT: orpd %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_ule_f64:
diff --git a/llvm/test/CodeGen/X86/fast-isel-select.ll b/llvm/test/CodeGen/X86/fast-isel-select.ll
index 5f65dde68a4a..7865f9958ec5 100644
--- a/llvm/test/CodeGen/X86/fast-isel-select.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-select.ll
@@ -9,11 +9,11 @@
define i32 @fastisel_select(i1 %exchSub2211_, i1 %trunc_8766) {
; CHECK-LABEL: fastisel_select:
; CHECK: ## %bb.0:
-; CHECK-NEXT: movb %sil, %dl
-; CHECK-NEXT: movb %dil, %cl
+; CHECK-NEXT: ## kill: def $sil killed $sil killed $esi
+; CHECK-NEXT: ## kill: def $dil killed $dil killed $edi
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: subb %dl, %cl
-; CHECK-NEXT: testb $1, %cl
+; CHECK-NEXT: subb %sil, %dil
+; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: movl $1204476887, %ecx ## imm = 0x47CADBD7
; CHECK-NEXT: cmovnel %ecx, %eax
; CHECK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/fast-isel-x86-64.ll b/llvm/test/CodeGen/X86/fast-isel-x86-64.ll
index 5d3c07fb46c3..30c8af288ac5 100644
--- a/llvm/test/CodeGen/X86/fast-isel-x86-64.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-x86-64.ll
@@ -299,8 +299,8 @@ define void @test23(i8* noalias sret %result) {
; CHECK-LABEL: test23:
; CHECK: movq %rdi, [[STACK:[0-9]+\(%rsp\)]]
; CHECK: call
-; CHECK-NEXT: movq [[STACK]], %rax
-; CHECK-NEXT: addq $24, %rsp
+; CHECK: movq [[STACK]], %rcx
+; CHECK: movq %rcx, %rax
; CHECK: ret
}
diff --git a/llvm/test/CodeGen/X86/mixed-ptr-sizes-i686.ll b/llvm/test/CodeGen/X86/mixed-ptr-sizes-i686.ll
index 8392e3ed43f2..14a233ed7fd4 100644
--- a/llvm/test/CodeGen/X86/mixed-ptr-sizes-i686.ll
+++ b/llvm/test/CodeGen/X86/mixed-ptr-sizes-i686.ll
@@ -61,12 +61,12 @@ define dso_local void @test_sign_ext(%struct.Foo* %f, i32* %i) {
;
; CHECK-O0-LABEL: test_sign_ext:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-O0-NEXT: movl %edx, %ecx
-; CHECK-O0-NEXT: sarl $31, %ecx
-; CHECK-O0-NEXT: movl %edx, 8(%eax)
-; CHECK-O0-NEXT: movl %ecx, 12(%eax)
+; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-O0-NEXT: movl %eax, %edx
+; CHECK-O0-NEXT: sarl $31, %edx
+; CHECK-O0-NEXT: movl %eax, 8(%ecx)
+; CHECK-O0-NEXT: movl %edx, 12(%ecx)
; CHECK-O0-NEXT: jmp _use_foo # TAILCALL
entry:
%0 = addrspacecast i32* %i to i32 addrspace(272)*
@@ -77,21 +77,13 @@ entry:
}
define dso_local void @test_zero_ext(%struct.Foo* %f, i32 addrspace(271)* %i) {
-; CHECK-LABEL: test_zero_ext:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movl %eax, 8(%ecx)
-; CHECK-NEXT: movl $0, 12(%ecx)
-; CHECK-NEXT: jmp _use_foo # TAILCALL
-;
-; CHECK-O0-LABEL: test_zero_ext:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-O0-NEXT: movl %ecx, 8(%eax)
-; CHECK-O0-NEXT: movl $0, 12(%eax)
-; CHECK-O0-NEXT: jmp _use_foo # TAILCALL
+; ALL-LABEL: test_zero_ext:
+; ALL: # %bb.0: # %entry
+; ALL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; ALL-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; ALL-NEXT: movl %eax, 8(%ecx)
+; ALL-NEXT: movl $0, 12(%ecx)
+; ALL-NEXT: jmp _use_foo # TAILCALL
entry:
%0 = addrspacecast i32 addrspace(271)* %i to i32 addrspace(272)*
%p64 = getelementptr inbounds %struct.Foo, %struct.Foo* %f, i32 0, i32 1
@@ -110,10 +102,13 @@ define dso_local void @test_trunc(%struct.Foo* %f, i32 addrspace(272)* %i) {
;
; CHECK-O0-LABEL: test_trunc:
; CHECK-O0: # %bb.0: # %entry
+; CHECK-O0-NEXT: pushl %eax
; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-O0-NEXT: movl %ecx, (%eax)
+; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-O0-NEXT: movl %ecx, (%edx)
+; CHECK-O0-NEXT: movl %eax, (%esp) # 4-byte Spill
+; CHECK-O0-NEXT: popl %eax
; CHECK-O0-NEXT: jmp _use_foo # TAILCALL
entry:
%0 = addrspacecast i32 addrspace(272)* %i to i32*
@@ -124,19 +119,12 @@ entry:
}
define dso_local void @test_noop1(%struct.Foo* %f, i32* %i) {
-; CHECK-LABEL: test_noop1:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movl %eax, (%ecx)
-; CHECK-NEXT: jmp _use_foo # TAILCALL
-;
-; CHECK-O0-LABEL: test_noop1:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-O0-NEXT: movl %ecx, (%eax)
-; CHECK-O0-NEXT: jmp _use_foo # TAILCALL
+; ALL-LABEL: test_noop1:
+; ALL: # %bb.0: # %entry
+; ALL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; ALL-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; ALL-NEXT: movl %eax, (%ecx)
+; ALL-NEXT: jmp _use_foo # TAILCALL
entry:
%p32 = getelementptr inbounds %struct.Foo, %struct.Foo* %f, i32 0, i32 0
store i32* %i, i32** %p32, align 8
@@ -156,11 +144,11 @@ define dso_local void @test_noop2(%struct.Foo* %f, i32 addrspace(272)* %i) {
;
; CHECK-O0-LABEL: test_noop2:
; CHECK-O0: # %bb.0: # %entry
+; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-O0-NEXT: movl %edx, 8(%eax)
-; CHECK-O0-NEXT: movl %ecx, 12(%eax)
+; CHECK-O0-NEXT: movl %ecx, 8(%edx)
+; CHECK-O0-NEXT: movl %eax, 12(%edx)
; CHECK-O0-NEXT: jmp _use_foo # TAILCALL
entry:
%p64 = getelementptr inbounds %struct.Foo, %struct.Foo* %f, i32 0, i32 1
@@ -183,11 +171,11 @@ define dso_local void @test_null_arg(%struct.Foo* %f) {
; CHECK-O0-LABEL: test_null_arg:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: subl $12, %esp
-; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-O0-NEXT: movl %esp, %eax
-; CHECK-O0-NEXT: movl %ecx, (%eax)
-; CHECK-O0-NEXT: movl $0, 8(%eax)
-; CHECK-O0-NEXT: movl $0, 4(%eax)
+; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-O0-NEXT: movl %esp, %ecx
+; CHECK-O0-NEXT: movl %eax, (%ecx)
+; CHECK-O0-NEXT: movl $0, 8(%ecx)
+; CHECK-O0-NEXT: movl $0, 4(%ecx)
; CHECK-O0-NEXT: calll _test_noop2
; CHECK-O0-NEXT: addl $12, %esp
; CHECK-O0-NEXT: retl
@@ -208,12 +196,12 @@ define dso_local void @test_unrecognized(%struct.Foo* %f, i32 addrspace(14)* %i)
;
; CHECK-O0-LABEL: test_unrecognized:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-O0-NEXT: movl %edx, %ecx
-; CHECK-O0-NEXT: sarl $31, %ecx
-; CHECK-O0-NEXT: movl %edx, 8(%eax)
-; CHECK-O0-NEXT: movl %ecx, 12(%eax)
+; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-O0-NEXT: movl %eax, %edx
+; CHECK-O0-NEXT: sarl $31, %edx
+; CHECK-O0-NEXT: movl %eax, 8(%ecx)
+; CHECK-O0-NEXT: movl %edx, 12(%ecx)
; CHECK-O0-NEXT: jmp _use_foo # TAILCALL
entry:
%0 = addrspacecast i32 addrspace(14)* %i to i32 addrspace(272)*
@@ -233,10 +221,13 @@ define dso_local void @test_unrecognized2(%struct.Foo* %f, i32 addrspace(272)* %
;
; CHECK-O0-LABEL: test_unrecognized2:
; CHECK-O0: # %bb.0: # %entry
+; CHECK-O0-NEXT: pushl %eax
; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-O0-NEXT: movl %ecx, 16(%eax)
+; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-O0-NEXT: movl %ecx, 16(%edx)
+; CHECK-O0-NEXT: movl %eax, (%esp) # 4-byte Spill
+; CHECK-O0-NEXT: popl %eax
; CHECK-O0-NEXT: jmp _use_foo # TAILCALL
entry:
%0 = addrspacecast i32 addrspace(272)* %i to i32 addrspace(9)*
@@ -247,22 +238,32 @@ entry:
}
define i32 @test_load_sptr32(i32 addrspace(270)* %i) {
-; ALL-LABEL: test_load_sptr32:
-; ALL: # %bb.0: # %entry
-; ALL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; ALL-NEXT: movl (%eax), %eax
-; ALL-NEXT: retl
+; CHECK-LABEL: test_load_sptr32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl (%eax), %eax
+; CHECK-NEXT: retl
+; CHECK-O0-LABEL: test_load_sptr32:
+; CHECK-O0: # %bb.0: # %entry
+; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-O0-NEXT: movl (%eax), %eax
+; CHECK-O0-NEXT: retl
entry:
%0 = load i32, i32 addrspace(270)* %i, align 4
ret i32 %0
}
define i32 @test_load_uptr32(i32 addrspace(271)* %i) {
-; ALL-LABEL: test_load_uptr32:
-; ALL: # %bb.0: # %entry
-; ALL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; ALL-NEXT: movl (%eax), %eax
-; ALL-NEXT: retl
+; CHECK-LABEL: test_load_uptr32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl (%eax), %eax
+; CHECK-NEXT: retl
+; CHECK-O0-LABEL: test_load_uptr32:
+; CHECK-O0: # %bb.0: # %entry
+; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-O0-NEXT: movl (%eax), %eax
+; CHECK-O0-NEXT: retl
entry:
%0 = load i32, i32 addrspace(271)* %i, align 4
ret i32 %0
@@ -274,12 +275,15 @@ define i32 @test_load_ptr64(i32 addrspace(272)* %i) {
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl (%eax), %eax
; CHECK-NEXT: retl
-;
; CHECK-O0-LABEL: test_load_ptr64:
; CHECK-O0: # %bb.0: # %entry
+; CHECK-O0-NEXT: pushl %eax
; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-O0-NEXT: movl (%eax), %eax
+; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-O0-NEXT: movl (%ecx), %ecx
+; CHECK-O0-NEXT: movl %eax, (%esp)
+; CHECK-O0-NEXT: movl %ecx, %eax
+; CHECK-O0-NEXT: popl %ecx
; CHECK-O0-NEXT: retl
entry:
%0 = load i32, i32 addrspace(272)* %i, align 8
@@ -293,12 +297,11 @@ define void @test_store_sptr32(i32 addrspace(270)* %s, i32 %i) {
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl %eax, (%ecx)
; CHECK-NEXT: retl
-;
; CHECK-O0-LABEL: test_store_sptr32:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-O0-NEXT: movl %ecx, (%eax)
+; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-O0-NEXT: movl %eax, (%ecx)
; CHECK-O0-NEXT: retl
entry:
store i32 %i, i32 addrspace(270)* %s, align 4
@@ -312,12 +315,11 @@ define void @test_store_uptr32(i32 addrspace(271)* %s, i32 %i) {
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl %eax, (%ecx)
; CHECK-NEXT: retl
-;
; CHECK-O0-LABEL: test_store_uptr32:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-O0-NEXT: movl %ecx, (%eax)
+; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-O0-NEXT: movl %eax, (%ecx)
; CHECK-O0-NEXT: retl
entry:
store i32 %i, i32 addrspace(271)* %s, align 4
@@ -331,13 +333,12 @@ define void @test_store_ptr64(i32 addrspace(272)* %s, i32 %i) {
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl %eax, (%ecx)
; CHECK-NEXT: retl
-;
; CHECK-O0-LABEL: test_store_ptr64:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-O0-NEXT: movl %ecx, (%eax)
+; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-O0-NEXT: movl %edx, (%ecx)
; CHECK-O0-NEXT: retl
entry:
store i32 %i, i32 addrspace(272)* %s, align 8
diff --git a/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll b/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll
index 76f775b834e0..b452606484b6 100644
--- a/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll
+++ b/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll
@@ -88,8 +88,8 @@ define dso_local void @test_trunc(%struct.Foo* %f, i32* %i) {
;
; CHECK-O0-LABEL: test_trunc:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: movl %edx, %eax
-; CHECK-O0-NEXT: movl %eax, (%rcx)
+; CHECK-O0-NEXT: # kill: def $edx killed $edx killed $rdx
+; CHECK-O0-NEXT: movl %edx, (%rcx)
; CHECK-O0-NEXT: jmp use_foo # TAILCALL
entry:
%0 = addrspacecast i32* %i to i32 addrspace(270)*
@@ -150,8 +150,8 @@ define void @test_unrecognized(%struct.Foo* %f, i32 addrspace(14)* %i) {
;
; CHECK-O0-LABEL: test_unrecognized:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: movl %edx, %eax
-; CHECK-O0-NEXT: movl %eax, (%rcx)
+; CHECK-O0-NEXT: # kill: def $edx killed $edx killed $rdx
+; CHECK-O0-NEXT: movl %edx, (%rcx)
; CHECK-O0-NEXT: jmp use_foo # TAILCALL
entry:
%0 = addrspacecast i32 addrspace(14)* %i to i32 addrspace(270)*
@@ -183,11 +183,16 @@ entry:
}
define i32 @test_load_sptr32(i32 addrspace(270)* %i) {
-; ALL-LABEL: test_load_sptr32:
-; ALL: # %bb.0: # %entry
-; ALL-NEXT: movslq %ecx, %rax
-; ALL-NEXT: movl (%rax), %eax
-; ALL-NEXT: retq
+; CHECK-LABEL: test_load_sptr32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movslq %ecx, %rax
+; CHECK-NEXT: movl (%rax), %eax
+; CHECK-NEXT: retq
+; CHECK-O0-LABEL: test_load_sptr32:
+; CHECK-O0: # %bb.0: # %entry
+; CHECK-O0-NEXT: movslq %ecx, %rax
+; CHECK-O0-NEXT: movl (%rax), %eax
+; CHECK-O0-NEXT: retq
entry:
%0 = load i32, i32 addrspace(270)* %i, align 4
ret i32 %0
@@ -199,7 +204,6 @@ define i32 @test_load_uptr32(i32 addrspace(271)* %i) {
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl (%rax), %eax
; CHECK-NEXT: retq
-;
; CHECK-O0-LABEL: test_load_uptr32:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: movl %ecx, %eax
@@ -212,21 +216,30 @@ entry:
}
define i32 @test_load_ptr64(i32 addrspace(272)* %i) {
-; ALL-LABEL: test_load_ptr64:
-; ALL: # %bb.0: # %entry
-; ALL-NEXT: movl (%rcx), %eax
-; ALL-NEXT: retq
+; CHECK-LABEL: test_load_ptr64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl (%rcx), %eax
+; CHECK-NEXT: retq
+; CHECK-O0-LABEL: test_load_ptr64:
+; CHECK-O0: # %bb.0: # %entry
+; CHECK-O0-NEXT: movl (%rcx), %eax
+; CHECK-O0-NEXT: retq
entry:
%0 = load i32, i32 addrspace(272)* %i, align 8
ret i32 %0
}
define void @test_store_sptr32(i32 addrspace(270)* %s, i32 %i) {
-; ALL-LABEL: test_store_sptr32:
-; ALL: # %bb.0: # %entry
-; ALL-NEXT: movslq %ecx, %rax
-; ALL-NEXT: movl %edx, (%rax)
-; ALL-NEXT: retq
+; CHECK-LABEL: test_store_sptr32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movslq %ecx, %rax
+; CHECK-NEXT: movl %edx, (%rax)
+; CHECK-NEXT: retq
+; CHECK-O0-LABEL: test_store_sptr32:
+; CHECK-O0: # %bb.0: # %entry
+; CHECK-O0-NEXT: movslq %ecx, %rax
+; CHECK-O0-NEXT: movl %edx, (%rax)
+; CHECK-O0-NEXT: retq
entry:
store i32 %i, i32 addrspace(270)* %s, align 4
ret void
@@ -238,7 +251,6 @@ define void @test_store_uptr32(i32 addrspace(271)* %s, i32 %i) {
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %edx, (%rax)
; CHECK-NEXT: retq
-;
; CHECK-O0-LABEL: test_store_uptr32:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: movl %ecx, %eax
@@ -251,10 +263,14 @@ entry:
}
define void @test_store_ptr64(i32 addrspace(272)* %s, i32 %i) {
-; ALL-LABEL: test_store_ptr64:
-; ALL: # %bb.0: # %entry
-; ALL-NEXT: movl %edx, (%rcx)
-; ALL-NEXT: retq
+; CHECK-LABEL: test_store_ptr64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl %edx, (%rcx)
+; CHECK-NEXT: retq
+; CHECK-O0-LABEL: test_store_ptr64:
+; CHECK-O0: # %bb.0: # %entry
+; CHECK-O0-NEXT: movl %edx, (%rcx)
+; CHECK-O0-NEXT: retq
entry:
store i32 %i, i32 addrspace(272)* %s, align 8
ret void
diff --git a/llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll b/llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll
index a1fc3f983176..2a129bc643b3 100644
--- a/llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll
+++ b/llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll
@@ -1,6 +1,7 @@
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast -optimize-regalloc=0 -no-x86-call-frame-opt | FileCheck %s
; RUN: llc -O0 < %s -stack-symbol-ordering=0 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast -no-x86-call-frame-opt | FileCheck %s
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i386-apple-darwin9 -mcpu=atom -regalloc=fast -optimize-regalloc=0 -no-x86-call-frame-opt | FileCheck %s
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i386-apple-darwin9 -mcpu=atom -regalloc=fast -optimize-regalloc=0 -no-x86-call-frame-opt | FileCheck -check-prefix=ATOM %s
+; CHECKed instructions should be the same with or without -O0 except on Intel Atom due to instruction scheduling.
@.str = private constant [12 x i8] c"x + y = %i\0A\00", align 1 ; <[12 x i8]*> [#uses=1]
@@ -16,6 +17,18 @@ entry:
; CHECK-NOT: movl
; CHECK: addl %ebx, %eax
+; On Intel Atom the scheduler moves a movl instruction
+; used for the printf call to follow movl 24(%esp), %eax
+; ATOM: movl 24(%esp), %eax
+; ATOM-NOT: movl
+; ATOM: movl %eax, 36(%esp)
+; ATOM: movl
+; ATOM: movl 28(%esp), %ebx
+; ATOM-NOT: movl
+; ATOM: movl %ebx, 40(%esp)
+; ATOM-NOT: movl
+; ATOM: addl %ebx, %eax
+
%retval = alloca i32 ; <i32*> [#uses=2]
%"%ebx" = alloca i32 ; <i32*> [#uses=1]
%"%eax" = alloca i32 ; <i32*> [#uses=2]
diff --git a/llvm/test/CodeGen/X86/pr11415.ll b/llvm/test/CodeGen/X86/pr11415.ll
index ee632189ef9c..b3d9b2ff4839 100644
--- a/llvm/test/CodeGen/X86/pr11415.ll
+++ b/llvm/test/CodeGen/X86/pr11415.ll
@@ -6,11 +6,12 @@
; CHECK: #APP
; CHECK-NEXT: #NO_APP
-; CHECK-NEXT: movq %rcx, %rdx
+; CHECK-NEXT: movq %rcx, %rax
+; CHECK-NEXT: movq %rax, -8(%rsp)
+; CHECK-NEXT: movq -8(%rsp), %rdx
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
-; CHECK-NEXT: movq %rcx, -8(%rsp)
-; CHECK-NEXT: movq -8(%rsp), %rax
+; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: ret
define i64 @foo() {
diff --git a/llvm/test/CodeGen/X86/pr1489.ll b/llvm/test/CodeGen/X86/pr1489.ll
index 978164fdafdb..d1148eecb0da 100644
--- a/llvm/test/CodeGen/X86/pr1489.ll
+++ b/llvm/test/CodeGen/X86/pr1489.ll
@@ -110,25 +110,28 @@ define i32 @main() nounwind {
; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
-; CHECK-NEXT: subl $32, %esp
+; CHECK-NEXT: subl $48, %esp
; CHECK-NEXT: calll _baz
-; CHECK-NEXT: movl %eax, %edi
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; CHECK-NEXT: calll _bar
-; CHECK-NEXT: movl %eax, %esi
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; CHECK-NEXT: calll _foo
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; CHECK-NEXT: calll _quux
+; CHECK-NEXT: movl %esp, %ecx
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: movl %edi, 16(%eax)
-; CHECK-NEXT: movl %esi, 12(%eax)
-; CHECK-NEXT: movl %edx, 8(%eax)
-; CHECK-NEXT: movl %ecx, 4(%eax)
-; CHECK-NEXT: movl $_.str, (%eax)
+; CHECK-NEXT: movl %edx, 16(%ecx)
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; CHECK-NEXT: movl %esi, 12(%ecx)
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; CHECK-NEXT: movl %edi, 8(%ecx)
+; CHECK-NEXT: movl %eax, 4(%ecx)
+; CHECK-NEXT: movl $_.str, (%ecx)
; CHECK-NEXT: calll _printf
-; CHECK-NEXT: ## implicit-def: $eax
-; CHECK-NEXT: addl $32, %esp
+; CHECK-NEXT: ## implicit-def: $ecx
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: addl $48, %esp
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK-NEXT: popl %ebp
diff --git a/llvm/test/CodeGen/X86/pr27591.ll b/llvm/test/CodeGen/X86/pr27591.ll
index a925bb8dfd6a..7455584ac698 100644
--- a/llvm/test/CodeGen/X86/pr27591.ll
+++ b/llvm/test/CodeGen/X86/pr27591.ll
@@ -9,8 +9,9 @@ define void @test1(i32 %x) #0 {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: cmpl $0, %edi
; CHECK-NEXT: setne %al
-; CHECK-NEXT: movzbl %al, %edi
-; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: movl %eax, %edi
; CHECK-NEXT: callq callee1
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
@@ -26,9 +27,10 @@ define void @test2(i32 %x) #0 {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: cmpl $0, %edi
; CHECK-NEXT: setne %al
-; CHECK-NEXT: movzbl %al, %edi
-; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: negl %edi
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: negl %eax
+; CHECK-NEXT: movl %eax, %edi
; CHECK-NEXT: callq callee2
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/pr30430.ll b/llvm/test/CodeGen/X86/pr30430.ll
index 7f771c955fbe..e524245daa11 100644
--- a/llvm/test/CodeGen/X86/pr30430.ll
+++ b/llvm/test/CodeGen/X86/pr30430.ll
@@ -12,13 +12,13 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float
; CHECK-NEXT: andq $-64, %rsp
; CHECK-NEXT: subq $256, %rsp # imm = 0x100
; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm11 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm14 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm15 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%rsp)
@@ -27,75 +27,75 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float
; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovss {{.*#+}} xmm15 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm14 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm11 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss %xmm15, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovss %xmm14, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovss %xmm13, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovss %xmm12, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovss %xmm11, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovss %xmm10, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovss %xmm9, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovss %xmm8, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovss %xmm4, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm16 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm17 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm18 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm19 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm20 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm21 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm22 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm23 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovss %xmm4, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovss %xmm16, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovss %xmm17, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovss %xmm18, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovss %xmm19, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovss %xmm20, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovss %xmm21, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovss %xmm22, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovss %xmm23, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[0]
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
+; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
+; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0,1,2],xmm2[0]
-; CHECK-NEXT: # implicit-def: $ymm0
-; CHECK-NEXT: vmovaps %xmm2, %xmm0
-; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
+; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
+; CHECK-NEXT: # implicit-def: $ymm2
+; CHECK-NEXT: vmovaps %xmm1, %xmm2
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
+; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
+; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0,1,2],xmm2[0]
; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3]
+; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0],xmm0[3]
+; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm3 = xmm0[0,1,2],xmm3[0]
-; CHECK-NEXT: # implicit-def: $ymm0
-; CHECK-NEXT: vmovaps %xmm3, %xmm0
-; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm2
-; CHECK-NEXT: # implicit-def: $zmm0
-; CHECK-NEXT: vmovaps %ymm2, %ymm0
-; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
+; CHECK-NEXT: # implicit-def: $ymm3
+; CHECK-NEXT: vmovaps %xmm2, %xmm3
+; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
+; CHECK-NEXT: # implicit-def: $zmm2
+; CHECK-NEXT: vmovaps %ymm1, %ymm2
+; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0
; CHECK-NEXT: movq %rbp, %rsp
diff --git a/llvm/test/CodeGen/X86/pr30813.ll b/llvm/test/CodeGen/X86/pr30813.ll
index e3e096bda6c2..7266c5bd8d01 100644
--- a/llvm/test/CodeGen/X86/pr30813.ll
+++ b/llvm/test/CodeGen/X86/pr30813.ll
@@ -1,9 +1,8 @@
; RUN: llc -mtriple=x86_64-linux-gnu -O0 %s -o - | FileCheck %s
; CHECK: patatino:
; CHECK: .cfi_startproc
-; CHECK: movzwl (%rax), [[REG0:%e[abcd]x]]
-; CHECK: movl [[REG0]], %e[[REG1C:[abcd]]]x
-; CHECK: movq %r[[REG1C]]x, ({{%r[abcd]x}})
+; CHECK: movzwl (%rax), %e[[REG0:[abcd]x]]
+; CHECK: movq %r[[REG0]], ({{%r[abcd]x}})
; CHECK: retq
define void @patatino() {
diff --git a/llvm/test/CodeGen/X86/pr32241.ll b/llvm/test/CodeGen/X86/pr32241.ll
index 6fb770b4a75e..1f3d273dfc41 100644
--- a/llvm/test/CodeGen/X86/pr32241.ll
+++ b/llvm/test/CodeGen/X86/pr32241.ll
@@ -10,10 +10,10 @@ define i32 @_Z3foov() {
; CHECK-NEXT: movw $-15498, {{[0-9]+}}(%esp) # imm = 0xC376
; CHECK-NEXT: movw $19417, {{[0-9]+}}(%esp) # imm = 0x4BD9
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: cmpw $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; CHECK-NEXT: movb $1, %cl
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; CHECK-NEXT: jne .LBB0_2
; CHECK-NEXT: # %bb.1: # %lor.rhs
; CHECK-NEXT: xorl %eax, %eax
@@ -21,11 +21,11 @@ define i32 @_Z3foov() {
; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; CHECK-NEXT: jmp .LBB0_2
; CHECK-NEXT: .LBB0_2: # %lor.end
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
-; CHECK-NEXT: andb $1, %cl
-; CHECK-NEXT: movzbl %cl, %ecx
-; CHECK-NEXT: cmpl %ecx, %eax
+; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; CHECK-NEXT: andb $1, %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT: cmpl %eax, %ecx
; CHECK-NEXT: setl %al
; CHECK-NEXT: andb $1, %al
; CHECK-NEXT: movzbl %al, %eax
diff --git a/llvm/test/CodeGen/X86/pr32284.ll b/llvm/test/CodeGen/X86/pr32284.ll
index cb9b33ca907b..533473663d73 100644
--- a/llvm/test/CodeGen/X86/pr32284.ll
+++ b/llvm/test/CodeGen/X86/pr32284.ll
@@ -178,8 +178,17 @@ define void @f1() {
;
; 686-O0-LABEL: f1:
; 686-O0: # %bb.0: # %entry
+; 686-O0-NEXT: pushl %ebx
+; 686-O0-NEXT: .cfi_def_cfa_offset 8
+; 686-O0-NEXT: pushl %edi
+; 686-O0-NEXT: .cfi_def_cfa_offset 12
+; 686-O0-NEXT: pushl %esi
+; 686-O0-NEXT: .cfi_def_cfa_offset 16
; 686-O0-NEXT: subl $1, %esp
-; 686-O0-NEXT: .cfi_def_cfa_offset 5
+; 686-O0-NEXT: .cfi_def_cfa_offset 17
+; 686-O0-NEXT: .cfi_offset %esi, -16
+; 686-O0-NEXT: .cfi_offset %edi, -12
+; 686-O0-NEXT: .cfi_offset %ebx, -8
; 686-O0-NEXT: movl var_5, %eax
; 686-O0-NEXT: movl %eax, %ecx
; 686-O0-NEXT: sarl $31, %ecx
@@ -188,27 +197,33 @@ define void @f1() {
; 686-O0-NEXT: orl %ecx, %eax
; 686-O0-NEXT: setne (%esp)
; 686-O0-NEXT: movl var_5, %ecx
-; 686-O0-NEXT: movl %ecx, %eax
-; 686-O0-NEXT: sarl $31, %eax
; 686-O0-NEXT: movl %ecx, %edx
-; 686-O0-NEXT: subl $-1, %edx
-; 686-O0-NEXT: sete %dl
-; 686-O0-NEXT: movzbl %dl, %edx
+; 686-O0-NEXT: sarl $31, %edx
+; 686-O0-NEXT: movl %ecx, %esi
+; 686-O0-NEXT: subl $-1, %esi
+; 686-O0-NEXT: sete %bl
+; 686-O0-NEXT: movzbl %bl, %edi
; 686-O0-NEXT: addl $7093, %ecx # imm = 0x1BB5
-; 686-O0-NEXT: adcl $0, %eax
-; 686-O0-NEXT: subl %edx, %ecx
-; 686-O0-NEXT: sbbl $0, %eax
-; 686-O0-NEXT: setl %al
-; 686-O0-NEXT: movzbl %al, %eax
-; 686-O0-NEXT: movl %eax, var_57
+; 686-O0-NEXT: adcl $0, %edx
+; 686-O0-NEXT: subl %edi, %ecx
+; 686-O0-NEXT: sbbl $0, %edx
+; 686-O0-NEXT: setl %bl
+; 686-O0-NEXT: movzbl %bl, %edi
+; 686-O0-NEXT: movl %edi, var_57
; 686-O0-NEXT: movl $0, var_57+4
-; 686-O0-NEXT: movl var_5, %eax
-; 686-O0-NEXT: subl $-1, %eax
-; 686-O0-NEXT: sete %al
-; 686-O0-NEXT: movzbl %al, %eax
-; 686-O0-NEXT: movl %eax, _ZN8struct_210member_2_0E
+; 686-O0-NEXT: movl var_5, %edi
+; 686-O0-NEXT: subl $-1, %edi
+; 686-O0-NEXT: sete %bl
+; 686-O0-NEXT: movzbl %bl, %ebx
+; 686-O0-NEXT: movl %ebx, _ZN8struct_210member_2_0E
; 686-O0-NEXT: movl $0, _ZN8struct_210member_2_0E+4
; 686-O0-NEXT: addl $1, %esp
+; 686-O0-NEXT: .cfi_def_cfa_offset 16
+; 686-O0-NEXT: popl %esi
+; 686-O0-NEXT: .cfi_def_cfa_offset 12
+; 686-O0-NEXT: popl %edi
+; 686-O0-NEXT: .cfi_def_cfa_offset 8
+; 686-O0-NEXT: popl %ebx
; 686-O0-NEXT: .cfi_def_cfa_offset 4
; 686-O0-NEXT: retl
;
@@ -306,9 +321,9 @@ define void @f2() {
; X86-O0-NEXT: sete %al
; X86-O0-NEXT: andb $1, %al
; X86-O0-NEXT: movzbl %al, %eax
-; X86-O0-NEXT: movw %ax, %cx
-; X86-O0-NEXT: # implicit-def: $rax
-; X86-O0-NEXT: movw %cx, (%rax)
+; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-O0-NEXT: # implicit-def: $rcx
+; X86-O0-NEXT: movw %ax, (%rcx)
; X86-O0-NEXT: retq
;
; X64-LABEL: f2:
@@ -353,9 +368,9 @@ define void @f2() {
; 686-O0-NEXT: sete %al
; 686-O0-NEXT: andb $1, %al
; 686-O0-NEXT: movzbl %al, %eax
-; 686-O0-NEXT: movw %ax, %cx
-; 686-O0-NEXT: # implicit-def: $eax
-; 686-O0-NEXT: movw %cx, (%eax)
+; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax
+; 686-O0-NEXT: # implicit-def: $ecx
+; 686-O0-NEXT: movw %ax, (%ecx)
; 686-O0-NEXT: addl $2, %esp
; 686-O0-NEXT: .cfi_def_cfa_offset 4
; 686-O0-NEXT: retl
@@ -473,18 +488,18 @@ define void @f3() #0 {
; 686-O0-NEXT: andl $-8, %esp
; 686-O0-NEXT: subl $16, %esp
; 686-O0-NEXT: .cfi_offset %esi, -12
-; 686-O0-NEXT: movl var_13, %ecx
-; 686-O0-NEXT: movl %ecx, %eax
-; 686-O0-NEXT: notl %eax
-; 686-O0-NEXT: testl %ecx, %ecx
-; 686-O0-NEXT: sete %cl
-; 686-O0-NEXT: movzbl %cl, %ecx
-; 686-O0-NEXT: movl var_16, %esi
-; 686-O0-NEXT: movl %eax, %edx
-; 686-O0-NEXT: xorl %esi, %edx
-; 686-O0-NEXT: andl %edx, %ecx
-; 686-O0-NEXT: orl %ecx, %eax
-; 686-O0-NEXT: movl %eax, (%esp)
+; 686-O0-NEXT: movl var_13, %eax
+; 686-O0-NEXT: movl %eax, %ecx
+; 686-O0-NEXT: notl %ecx
+; 686-O0-NEXT: testl %eax, %eax
+; 686-O0-NEXT: sete %al
+; 686-O0-NEXT: movzbl %al, %eax
+; 686-O0-NEXT: movl var_16, %edx
+; 686-O0-NEXT: movl %ecx, %esi
+; 686-O0-NEXT: xorl %edx, %esi
+; 686-O0-NEXT: andl %esi, %eax
+; 686-O0-NEXT: orl %eax, %ecx
+; 686-O0-NEXT: movl %ecx, (%esp)
; 686-O0-NEXT: movl $0, {{[0-9]+}}(%esp)
; 686-O0-NEXT: movl var_13, %eax
; 686-O0-NEXT: notl %eax
diff --git a/llvm/test/CodeGen/X86/pr32340.ll b/llvm/test/CodeGen/X86/pr32340.ll
index 15774d605e62..98685b959f64 100644
--- a/llvm/test/CodeGen/X86/pr32340.ll
+++ b/llvm/test/CodeGen/X86/pr32340.ll
@@ -16,26 +16,26 @@ define void @foo() {
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: # kill: def $rax killed $eax
; X64-NEXT: movw $0, var_825
-; X64-NEXT: movzwl var_32, %edx
-; X64-NEXT: movzwl var_901, %ecx
-; X64-NEXT: movl %edx, %esi
-; X64-NEXT: xorl %ecx, %esi
-; X64-NEXT: movl %edx, %ecx
-; X64-NEXT: xorl %esi, %ecx
-; X64-NEXT: addl %edx, %ecx
-; X64-NEXT: movslq %ecx, %rcx
+; X64-NEXT: movzwl var_32, %ecx
+; X64-NEXT: movzwl var_901, %edx
+; X64-NEXT: movl %ecx, %esi
+; X64-NEXT: xorl %edx, %esi
+; X64-NEXT: movl %ecx, %edx
+; X64-NEXT: xorl %esi, %edx
+; X64-NEXT: addl %ecx, %edx
+; X64-NEXT: movslq %edx, %rcx
; X64-NEXT: movq %rcx, var_826
; X64-NEXT: movzwl var_32, %ecx
; X64-NEXT: # kill: def $rcx killed $ecx
; X64-NEXT: movzwl var_901, %edx
; X64-NEXT: xorl $51981, %edx # imm = 0xCB0D
-; X64-NEXT: movslq %edx, %rsi
-; X64-NEXT: movabsq $-1142377792914660288, %rdx # imm = 0xF02575732E06E440
-; X64-NEXT: xorq %rdx, %rsi
-; X64-NEXT: movq %rcx, %rdx
+; X64-NEXT: movslq %edx, %rdx
+; X64-NEXT: movabsq $-1142377792914660288, %rsi # imm = 0xF02575732E06E440
; X64-NEXT: xorq %rsi, %rdx
-; X64-NEXT: xorq $-1, %rdx
-; X64-NEXT: xorq %rdx, %rcx
+; X64-NEXT: movq %rcx, %rsi
+; X64-NEXT: xorq %rdx, %rsi
+; X64-NEXT: xorq $-1, %rsi
+; X64-NEXT: xorq %rsi, %rcx
; X64-NEXT: movq %rcx, %rdx
; X64-NEXT: orq var_57, %rdx
; X64-NEXT: orq %rdx, %rcx
diff --git a/llvm/test/CodeGen/X86/pr32345.ll b/llvm/test/CodeGen/X86/pr32345.ll
index 2182e8b4f901..165e0292d464 100644
--- a/llvm/test/CodeGen/X86/pr32345.ll
+++ b/llvm/test/CodeGen/X86/pr32345.ll
@@ -29,9 +29,9 @@ define void @foo() {
; X640-NEXT: # kill: def $rcx killed $ecx
; X640-NEXT: # kill: def $cl killed $rcx
; X640-NEXT: sarq %cl, %rax
-; X640-NEXT: movb %al, %cl
-; X640-NEXT: # implicit-def: $rax
-; X640-NEXT: movb %cl, (%rax)
+; X640-NEXT: # kill: def $al killed $al killed $rax
+; X640-NEXT: # implicit-def: $rcx
+; X640-NEXT: movb %al, (%rcx)
; X640-NEXT: retq
;
; 6860-LABEL: foo:
@@ -43,44 +43,44 @@ define void @foo() {
; 6860-NEXT: .cfi_def_cfa_register %ebp
; 6860-NEXT: andl $-8, %esp
; 6860-NEXT: subl $24, %esp
-; 6860-NEXT: movw var_22, %dx
+; 6860-NEXT: movw var_22, %ax
; 6860-NEXT: movzwl var_27, %ecx
-; 6860-NEXT: movw %cx, %ax
-; 6860-NEXT: xorw %ax, %dx
-; 6860-NEXT: # implicit-def: $eax
-; 6860-NEXT: movw %dx, %ax
-; 6860-NEXT: xorl %ecx, %eax
-; 6860-NEXT: # kill: def $ax killed $ax killed $eax
-; 6860-NEXT: movzwl %ax, %eax
+; 6860-NEXT: movw %cx, %dx
+; 6860-NEXT: xorw %dx, %ax
+; 6860-NEXT: # implicit-def: $edx
+; 6860-NEXT: movw %ax, %dx
+; 6860-NEXT: xorl %ecx, %edx
+; 6860-NEXT: # kill: def $dx killed $dx killed $edx
+; 6860-NEXT: movzwl %dx, %eax
; 6860-NEXT: movl %eax, {{[0-9]+}}(%esp)
; 6860-NEXT: movl $0, {{[0-9]+}}(%esp)
-; 6860-NEXT: movw var_22, %dx
-; 6860-NEXT: movzwl var_27, %eax
-; 6860-NEXT: movw %ax, %cx
-; 6860-NEXT: xorw %cx, %dx
-; 6860-NEXT: # implicit-def: $ecx
-; 6860-NEXT: movw %dx, %cx
-; 6860-NEXT: xorl %eax, %ecx
-; 6860-NEXT: # kill: def $cx killed $cx killed $ecx
-; 6860-NEXT: movzwl %cx, %edx
-; 6860-NEXT: movb %al, %cl
+; 6860-NEXT: movw var_22, %ax
+; 6860-NEXT: movzwl var_27, %ecx
+; 6860-NEXT: movw %cx, %dx
+; 6860-NEXT: xorw %dx, %ax
+; 6860-NEXT: # implicit-def: $edx
+; 6860-NEXT: movw %ax, %dx
+; 6860-NEXT: xorl %ecx, %edx
+; 6860-NEXT: # kill: def $dx killed $dx killed $edx
+; 6860-NEXT: movzwl %dx, %eax
+; 6860-NEXT: # kill: def $cl killed $cl killed $ecx
; 6860-NEXT: addb $30, %cl
+; 6860-NEXT: xorl %edx, %edx
; 6860-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; 6860-NEXT: xorl %eax, %eax
-; 6860-NEXT: shrdl %cl, %eax, %edx
+; 6860-NEXT: shrdl %cl, %edx, %eax
; 6860-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
-; 6860-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; 6860-NEXT: testb $32, %cl
; 6860-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; 6860-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; 6860-NEXT: jne .LBB0_2
; 6860-NEXT: # %bb.1: # %bb
; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; 6860-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; 6860-NEXT: .LBB0_2: # %bb
; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; 6860-NEXT: movb %al, %cl
-; 6860-NEXT: # implicit-def: $eax
-; 6860-NEXT: movb %cl, (%eax)
+; 6860-NEXT: # kill: def $al killed $al killed $eax
+; 6860-NEXT: # implicit-def: $ecx
+; 6860-NEXT: movb %al, (%ecx)
; 6860-NEXT: movl %ebp, %esp
; 6860-NEXT: popl %ebp
; 6860-NEXT: .cfi_def_cfa %esp, 4
diff --git a/llvm/test/CodeGen/X86/pr32451.ll b/llvm/test/CodeGen/X86/pr32451.ll
index f12e85b9a177..3b1997234ce5 100644
--- a/llvm/test/CodeGen/X86/pr32451.ll
+++ b/llvm/test/CodeGen/X86/pr32451.ll
@@ -9,24 +9,24 @@ target triple = "x86_64-unknown-linux-gnu"
define i8** @japi1_convert_690(i8**, i8***, i32) {
; CHECK-LABEL: japi1_convert_690:
; CHECK: # %bb.0: # %top
-; CHECK-NEXT: subl $12, %esp
-; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: subl $16, %esp
+; CHECK-NEXT: .cfi_def_cfa_offset 20
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: calll julia.gc_root_decl
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: calll jl_get_ptls_states
-; CHECK-NEXT: # kill: def $ecx killed $eax
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; CHECK-NEXT: movl 4(%eax), %eax
-; CHECK-NEXT: movb (%eax), %al
-; CHECK-NEXT: andb $1, %al
-; CHECK-NEXT: movzbl %al, %eax
-; CHECK-NEXT: movl %eax, (%esp)
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT: movl 4(%ecx), %edx
+; CHECK-NEXT: movb (%edx), %dl
+; CHECK-NEXT: andb $1, %dl
+; CHECK-NEXT: movzbl %dl, %edx
+; CHECK-NEXT: movl %edx, (%esp)
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: calll jl_box_int32
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %eax, (%ecx)
-; CHECK-NEXT: addl $12, %esp
+; CHECK-NEXT: addl $16, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: retl
top:
diff --git a/llvm/test/CodeGen/X86/pr32484.ll b/llvm/test/CodeGen/X86/pr32484.ll
index 0df1c4b54507..ef504eee6e8b 100644
--- a/llvm/test/CodeGen/X86/pr32484.ll
+++ b/llvm/test/CodeGen/X86/pr32484.ll
@@ -8,9 +8,9 @@ define void @foo() {
; CHECK-NEXT: jmpq *%rax
; CHECK-NEXT: .LBB0_1:
; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: # implicit-def: $rax
-; CHECK-NEXT: movdqu %xmm0, (%rax)
+; CHECK-NEXT: movdqu %xmm1, (%rax)
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: retq
indirectbr i8* undef, [label %9, label %1]
diff --git a/llvm/test/CodeGen/X86/pr34592.ll b/llvm/test/CodeGen/X86/pr34592.ll
index 3c5345bf3411..25b068c8fad6 100644
--- a/llvm/test/CodeGen/X86/pr34592.ll
+++ b/llvm/test/CodeGen/X86/pr34592.ll
@@ -10,42 +10,44 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: .cfi_def_cfa_register %rbp
; CHECK-NEXT: andq $-32, %rsp
-; CHECK-NEXT: subq $32, %rsp
-; CHECK-NEXT: vmovaps %ymm4, %ymm10
-; CHECK-NEXT: vmovaps %ymm3, %ymm9
-; CHECK-NEXT: vmovaps %ymm1, %ymm8
-; CHECK-NEXT: vmovaps %ymm0, %ymm4
-; CHECK-NEXT: vmovaps 240(%rbp), %ymm1
-; CHECK-NEXT: vmovaps 208(%rbp), %ymm3
-; CHECK-NEXT: vmovaps 176(%rbp), %ymm0
-; CHECK-NEXT: vmovaps 144(%rbp), %ymm0
-; CHECK-NEXT: vmovaps 112(%rbp), %ymm11
-; CHECK-NEXT: vmovaps 80(%rbp), %ymm11
-; CHECK-NEXT: vmovaps 48(%rbp), %ymm11
-; CHECK-NEXT: vmovaps 16(%rbp), %ymm11
-; CHECK-NEXT: vpblendd {{.*#+}} ymm4 = ymm6[0,1,2,3,4,5],ymm2[6,7]
-; CHECK-NEXT: vmovaps %xmm3, %xmm8
-; CHECK-NEXT: # implicit-def: $ymm2
-; CHECK-NEXT: vinserti128 $1, %xmm8, %ymm2, %ymm2
-; CHECK-NEXT: vpalignr {{.*#+}} ymm0 = ymm4[8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7],ymm4[24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23]
-; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,0]
-; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5],ymm0[6,7]
+; CHECK-NEXT: subq $160, %rsp
+; CHECK-NEXT: vmovaps 240(%rbp), %ymm8
+; CHECK-NEXT: vmovaps 208(%rbp), %ymm9
+; CHECK-NEXT: vmovaps 176(%rbp), %ymm10
+; CHECK-NEXT: vmovaps 144(%rbp), %ymm11
+; CHECK-NEXT: vmovaps 112(%rbp), %ymm12
+; CHECK-NEXT: vmovaps 80(%rbp), %ymm13
+; CHECK-NEXT: vmovaps 48(%rbp), %ymm14
+; CHECK-NEXT: vmovaps 16(%rbp), %ymm15
+; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm2[6,7]
+; CHECK-NEXT: vmovaps %ymm9, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: # implicit-def: $ymm0
+; CHECK-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0
+; CHECK-NEXT: vpalignr {{.*#+}} ymm2 = ymm2[8,9,10,11,12,13,14,15],ymm11[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm11[16,17,18,19,20,21,22,23]
+; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,3,2,0]
+; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5],ymm2[6,7]
; CHECK-NEXT: vmovaps %xmm7, %xmm2
-; CHECK-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
-; CHECK-NEXT: # implicit-def: $ymm2
-; CHECK-NEXT: vmovaps %xmm4, %xmm2
-; CHECK-NEXT: vpalignr {{.*#+}} ymm3 = ymm3[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm3[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23]
-; CHECK-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,1,0,3]
-; CHECK-NEXT: vpblendd {{.*#+}} ymm3 = ymm2[0,1,2,3],ymm3[4,5,6,7]
-; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm7[0,1],ymm1[2,3],ymm7[4,5,6,7]
-; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,1,1,3]
-; CHECK-NEXT: vpshufd {{.*#+}} ymm2 = ymm5[0,1,0,1,4,5,4,5]
-; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm2[6,7]
-; CHECK-NEXT: vextracti128 $1, %ymm7, %xmm2
-; CHECK-NEXT: vmovq {{.*#+}} xmm4 = xmm2[0],zero
-; CHECK-NEXT: # implicit-def: $ymm2
-; CHECK-NEXT: vmovaps %xmm4, %xmm2
-; CHECK-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm2[0,1],ymm6[0,1]
+; CHECK-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
+; CHECK-NEXT: # implicit-def: $ymm9
+; CHECK-NEXT: vmovaps %xmm2, %xmm9
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
+; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm2[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23]
+; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[0,1,0,3]
+; CHECK-NEXT: vpblendd {{.*#+}} ymm9 = ymm9[0,1,2,3],ymm11[4,5,6,7]
+; CHECK-NEXT: vpblendd {{.*#+}} ymm8 = ymm7[0,1],ymm8[2,3],ymm7[4,5,6,7]
+; CHECK-NEXT: vpermq {{.*#+}} ymm8 = ymm8[2,1,1,3]
+; CHECK-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,0,1,4,5,4,5]
+; CHECK-NEXT: vpblendd {{.*#+}} ymm5 = ymm8[0,1,2,3,4,5],ymm5[6,7]
+; CHECK-NEXT: vextracti128 $1, %ymm7, %xmm7
+; CHECK-NEXT: vmovq {{.*#+}} xmm7 = xmm7[0],zero
+; CHECK-NEXT: # implicit-def: $ymm8
+; CHECK-NEXT: vmovaps %xmm7, %xmm8
+; CHECK-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm8[0,1],ymm6[0,1]
+; CHECK-NEXT: vmovaps %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vmovaps %ymm5, %ymm1
+; CHECK-NEXT: vmovaps %ymm3, (%rsp) # 32-byte Spill
+; CHECK-NEXT: vmovaps %ymm9, %ymm3
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
diff --git a/llvm/test/CodeGen/X86/pr34653.ll b/llvm/test/CodeGen/X86/pr34653.ll
index f341a9a6c674..2f63ac311f2e 100644
--- a/llvm/test/CodeGen/X86/pr34653.ll
+++ b/llvm/test/CodeGen/X86/pr34653.ll
@@ -12,46 +12,52 @@ define void @pr34653() {
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: .cfi_def_cfa_register %rbp
; CHECK-NEXT: andq $-512, %rsp # imm = 0xFE00
-; CHECK-NEXT: subq $1024, %rsp # imm = 0x400
-; CHECK-NEXT: movq %rsp, %rdi
+; CHECK-NEXT: subq $1536, %rsp # imm = 0x600
+; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; CHECK-NEXT: callq test
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm5 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm6 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm7 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm8 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm9 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm10 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm11 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm12 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm13 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm14 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm15 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm16 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm17 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm18 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm19 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm20 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm21 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm22 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm23 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm24 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm25 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm26 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm27 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm28 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm29 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm30 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm31 = mem[0],zero
+; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
diff --git a/llvm/test/CodeGen/X86/pr39733.ll b/llvm/test/CodeGen/X86/pr39733.ll
index 4a940806c9b7..31bd5b71d0a6 100644
--- a/llvm/test/CodeGen/X86/pr39733.ll
+++ b/llvm/test/CodeGen/X86/pr39733.ll
@@ -17,13 +17,13 @@ define void @test55() {
; CHECK-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm0
; CHECK-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm1
-; CHECK-NEXT: vpmovsxwd %xmm1, %xmm2
-; CHECK-NEXT: # implicit-def: $ymm0
-; CHECK-NEXT: vmovaps %xmm2, %xmm0
-; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; CHECK-NEXT: vpmovsxwd %xmm1, %xmm1
-; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm0
+; CHECK-NEXT: vpmovsxwd %xmm0, %xmm1
+; CHECK-NEXT: # implicit-def: $ymm2
+; CHECK-NEXT: vmovaps %xmm1, %xmm2
+; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; CHECK-NEXT: vmovdqa %ymm0, (%rsp)
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
diff --git a/llvm/test/CodeGen/X86/pr42452.ll b/llvm/test/CodeGen/X86/pr42452.ll
index 14a6f3d13300..d3a1dad42bd3 100644
--- a/llvm/test/CodeGen/X86/pr42452.ll
+++ b/llvm/test/CodeGen/X86/pr42452.ll
@@ -6,12 +6,12 @@
define void @foo(i1 %c, <2 x i64> %x) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movb %dil, %al
-; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; CHECK-NEXT: # kill: def $dil killed $dil killed $edi
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; CHECK-NEXT: movq %xmm0, %rax
-; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq %xmm0, %rcx
+; CHECK-NEXT: movb %dil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: .LBB0_1: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
diff --git a/llvm/test/CodeGen/X86/pr44749.ll b/llvm/test/CodeGen/X86/pr44749.ll
index daf7e25884a4..1012d8c723b1 100644
--- a/llvm/test/CodeGen/X86/pr44749.ll
+++ b/llvm/test/CodeGen/X86/pr44749.ll
@@ -4,29 +4,33 @@
define i32 @a() {
; CHECK-LABEL: a:
; CHECK: ## %bb.0: ## %entry
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
+; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK-NEXT: callq _b
; CHECK-NEXT: cvtsi2sd %eax, %xmm0
; CHECK-NEXT: movq _calloc@{{.*}}(%rip), %rax
; CHECK-NEXT: subq $-1, %rax
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: movzbl %al, %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: leaq {{.*}}(%rip), %rax
+; CHECK-NEXT: setne %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: ## kill: def $rcx killed $ecx
+; CHECK-NEXT: leaq {{.*}}(%rip), %rdx
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: ucomisd %xmm1, %xmm0
-; CHECK-NEXT: setae %al
-; CHECK-NEXT: movzbl %al, %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: leaq {{.*}}(%rip), %rax
+; CHECK-NEXT: setae %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: ## kill: def $rcx killed $ecx
+; CHECK-NEXT: leaq {{.*}}(%rip), %rdx
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: cvttsd2si %xmm0, %eax
-; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: cvttsd2si %xmm0, %ecx
+; CHECK-NEXT: movq %rax, (%rsp) ## 8-byte Spill
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: retq
entry:
%call = call i32 (...) @b()
diff --git a/llvm/test/CodeGen/X86/pr47000.ll b/llvm/test/CodeGen/X86/pr47000.ll
index c2d9317a95ea..083aa780a07c 100755
--- a/llvm/test/CodeGen/X86/pr47000.ll
+++ b/llvm/test/CodeGen/X86/pr47000.ll
@@ -12,124 +12,124 @@ define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind {
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: subl $124, %esp
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl 144(%esp), %eax
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: movw 176(%esp), %dx
+; CHECK-NEXT: movw 172(%esp), %si
+; CHECK-NEXT: movw 168(%esp), %di
+; CHECK-NEXT: movw 164(%esp), %bx
+; CHECK-NEXT: movw 160(%esp), %bp
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movw {{[0-9]+}}(%esp), %si
-; CHECK-NEXT: movw {{[0-9]+}}(%esp), %dx
-; CHECK-NEXT: movw {{[0-9]+}}(%esp), %cx
-; CHECK-NEXT: movw {{[0-9]+}}(%esp), %ax
+; CHECK-NEXT: movw 156(%esp), %ax
+; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; CHECK-NEXT: movw 152(%esp), %ax
; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
-; CHECK-NEXT: movw {{[0-9]+}}(%esp), %di
-; CHECK-NEXT: movw {{[0-9]+}}(%esp), %bx
-; CHECK-NEXT: movw {{[0-9]+}}(%esp), %bp
-; CHECK-NEXT: movw {{[0-9]+}}(%esp), %ax
-; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movw 148(%esp), %ax
+; CHECK-NEXT: movw %ax, 112(%esp)
; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload
-; CHECK-NEXT: movw %bp, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movw %bx, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movw %di, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movw %si, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movw %dx, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movw %cx, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movw %ax, 114(%esp)
+; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload
+; CHECK-NEXT: movw %ax, 116(%esp)
+; CHECK-NEXT: movw %bp, 118(%esp)
+; CHECK-NEXT: movw %dx, 110(%esp)
+; CHECK-NEXT: movw %si, 108(%esp)
+; CHECK-NEXT: movw %di, 106(%esp)
+; CHECK-NEXT: movw %bx, 104(%esp)
+; CHECK-NEXT: movzwl 118(%esp), %edx
+; CHECK-NEXT: movzwl 116(%esp), %esi
+; CHECK-NEXT: movzwl 114(%esp), %edi
+; CHECK-NEXT: movzwl 112(%esp), %ebx
+; CHECK-NEXT: movzwl 110(%esp), %ebp
+; CHECK-NEXT: movzwl 108(%esp), %eax
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movzwl 106(%esp), %eax
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movzwl 104(%esp), %eax
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: movl %ecx, (%eax)
+; CHECK-NEXT: movl %ebx, (%eax)
+; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: calll __gnu_h2f_ieee
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-NEXT: movl %esp, %eax
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, (%eax)
+; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-NEXT: calll __gnu_h2f_ieee
-; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fstps 4(%eax)
+; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-NEXT: fstps (%eax)
; CHECK-NEXT: calll fmodf
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: fstps (%eax)
; CHECK-NEXT: calll __gnu_f2h_ieee
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT: movl %esp, %ecx
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; CHECK-NEXT: movl %edx, (%ecx)
; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
-; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: movl %ecx, (%eax)
; CHECK-NEXT: calll __gnu_h2f_ieee
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-NEXT: movl %esp, %eax
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, (%eax)
+; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-NEXT: calll __gnu_h2f_ieee
-; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fstps 4(%eax)
+; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-NEXT: fstps (%eax)
; CHECK-NEXT: calll fmodf
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: fstps (%eax)
; CHECK-NEXT: calll __gnu_f2h_ieee
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; CHECK-NEXT: movw %ax, %si
-; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: movl %ecx, (%eax)
+; CHECK-NEXT: movl %esp, %ecx
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; CHECK-NEXT: movl %edx, (%ecx)
+; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
; CHECK-NEXT: calll __gnu_h2f_ieee
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-NEXT: movl %esp, %eax
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, (%eax)
+; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-NEXT: calll __gnu_h2f_ieee
-; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fstps 4(%eax)
+; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-NEXT: fstps (%eax)
; CHECK-NEXT: calll fmodf
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: fstps (%eax)
; CHECK-NEXT: calll __gnu_f2h_ieee
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; CHECK-NEXT: movw %ax, %di
-; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: movl %ecx, (%eax)
+; CHECK-NEXT: movl %esp, %ecx
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; CHECK-NEXT: movl %edx, (%ecx)
+; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
; CHECK-NEXT: calll __gnu_h2f_ieee
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-NEXT: movl %esp, %eax
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, (%eax)
+; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-NEXT: calll __gnu_h2f_ieee
-; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fstps 4(%eax)
+; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-NEXT: fstps (%eax)
; CHECK-NEXT: calll fmodf
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: fstps (%eax)
; CHECK-NEXT: calll __gnu_f2h_ieee
-; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %dx # 2-byte Reload
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; CHECK-NEXT: movw %ax, %bx
+; CHECK-NEXT: movw %ax, 6(%ecx)
+; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload
+; CHECK-NEXT: movw %ax, 4(%ecx)
+; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %dx # 2-byte Reload
+; CHECK-NEXT: movw %dx, 2(%ecx)
+; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %si # 2-byte Reload
+; CHECK-NEXT: movw %si, (%ecx)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; CHECK-NEXT: movw %bx, 6(%ecx)
-; CHECK-NEXT: movw %di, 4(%ecx)
-; CHECK-NEXT: movw %si, 2(%ecx)
-; CHECK-NEXT: movw %dx, (%ecx)
; CHECK-NEXT: addl $124, %esp
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir b/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir
index f6b4536cbbc4..2821f00940ec 100644
--- a/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir
+++ b/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir
@@ -4,7 +4,7 @@
# Bug 41973. Make sure %12 is detected as live out of %bb.0, even
# though the use is allocated before the def block %bb.3. Previously
# mayLiveOut only recorded on defs, and would not find the virtual
-# register use if it had already been replaced with a physical
+# register use if it had already been replace with a physical
# register.
---
@@ -21,11 +21,11 @@ body: |
; CHECK: successors:
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: $rcx = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1)
- ; CHECK: renamable $eax = MOV32r0 implicit-def dead $eflags
- ; CHECK: renamable $rax = SUBREG_TO_REG 0, killed renamable $eax, %subreg.sub_32bit
- ; CHECK: MOV64mi32 killed renamable $rcx, 1, $noreg, 0, $noreg, 0 :: (volatile store 8)
- ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rax :: (store 8 into %stack.0)
+ ; CHECK: $rax = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1)
+ ; CHECK: renamable $ecx = MOV32r0 implicit-def $eflags
+ ; CHECK: renamable $rcx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit
+ ; CHECK: MOV64mi32 killed renamable $rax, 1, $noreg, 0, $noreg, 0 :: (volatile store 8)
+ ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rcx :: (store 8 into %stack.0)
; CHECK: bb.3:
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK: $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0)
@@ -46,6 +46,7 @@ body: |
bb.1:
successors:
+
bb.2:
%0:gr64 = COPY %12
%10:gr32 = MOV32r0 implicit-def $eflags
diff --git a/llvm/test/CodeGen/X86/stack-protector-msvc.ll b/llvm/test/CodeGen/X86/stack-protector-msvc.ll
index 7679bb4f77ca..c1f79f9db2f6 100644
--- a/llvm/test/CodeGen/X86/stack-protector-msvc.ll
+++ b/llvm/test/CodeGen/X86/stack-protector-msvc.ll
@@ -48,8 +48,9 @@ return: ; preds = %entry
; MSVC-X86-O0: xorl %esp, %[[REG1]]
; MSVC-X86-O0: movl %[[REG1]], [[SLOT:[0-9]*]](%esp)
; MSVC-X86-O0: calll _strcpy
-; MSVC-X86-O0: movl [[SLOT]](%esp), %ecx
-; MSVC-X86-O0: xorl %esp, %ecx
+; MSVC-X86-O0: movl [[SLOT]](%esp), %[[REG1:[^ ]*]]
+; MSVC-X86-O0: xorl %esp, %[[REG1]]
+; MSVC-X86-O0: movl %[[REG1]], %ecx
; MSVC-X86-O0: calll @__security_check_cookie at 4
; MSVC-X86-O0: retl
@@ -58,8 +59,9 @@ return: ; preds = %entry
; MSVC-X64-O0: xorq %rsp, %[[REG1]]
; MSVC-X64-O0: movq %[[REG1]], [[SLOT:[0-9]*]](%rsp)
; MSVC-X64-O0: callq strcpy
-; MSVC-X64-O0: movq [[SLOT]](%rsp), %rcx
-; MSVC-X64-O0: xorq %rsp, %rcx
+; MSVC-X64-O0: movq [[SLOT]](%rsp), %[[REG1:[^ ]*]]
+; MSVC-X64-O0: xorq %rsp, %[[REG1]]
+; MSVC-X64-O0: movq %[[REG1]], %rcx
; MSVC-X64-O0: callq __security_check_cookie
; MSVC-X64-O0: retq
diff --git a/llvm/test/CodeGen/X86/stack-protector-strong-macho-win32-xor.ll b/llvm/test/CodeGen/X86/stack-protector-strong-macho-win32-xor.ll
index ccc4b34ae930..f5647c341e73 100644
--- a/llvm/test/CodeGen/X86/stack-protector-strong-macho-win32-xor.ll
+++ b/llvm/test/CodeGen/X86/stack-protector-strong-macho-win32-xor.ll
@@ -14,7 +14,7 @@ define dso_local i32 @main(i32 %argc, i8** %argv, ...) #0 {
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: .cfi_def_cfa_register %rbp
-; CHECK-NEXT: subq $320, %rsp ## imm = 0x140
+; CHECK-NEXT: subq $336, %rsp ## imm = 0x150
; CHECK-NEXT: movq ___security_cookie@{{.*}}(%rip), %rax
; CHECK-NEXT: movq (%rax), %rax
; CHECK-NEXT: movq %rax, -8(%rbp)
@@ -25,9 +25,10 @@ define dso_local i32 @main(i32 %argc, i8** %argv, ...) #0 {
; CHECK-NEXT: leaq {{.*}}(%rip), %rcx
; CHECK-NEXT: callq _printf
; CHECK-NEXT: movq -8(%rbp), %rcx
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; CHECK-NEXT: callq ___security_check_cookie
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: addq $320, %rsp ## imm = 0x140
+; CHECK-NEXT: addq $336, %rsp ## imm = 0x150
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/swift-return.ll b/llvm/test/CodeGen/X86/swift-return.ll
index 11312f08edfa..4934419055ac 100644
--- a/llvm/test/CodeGen/X86/swift-return.ll
+++ b/llvm/test/CodeGen/X86/swift-return.ll
@@ -79,15 +79,16 @@ define i32 @test2(i32 %key) #0 {
; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi
; CHECK-O0-NEXT: movq %rsp, %rax
; CHECK-O0-NEXT: callq gen2
+; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %eax
; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edx
-; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %esi
-; CHECK-O0-NEXT: movl (%rsp), %eax
+; CHECK-O0-NEXT: movl (%rsp), %esi
; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi
-; CHECK-O0-NEXT: addl %edi, %eax
-; CHECK-O0-NEXT: addl %esi, %eax
-; CHECK-O0-NEXT: addl %edx, %eax
-; CHECK-O0-NEXT: addl %ecx, %eax
+; CHECK-O0-NEXT: addl %edi, %esi
+; CHECK-O0-NEXT: addl %edx, %esi
+; CHECK-O0-NEXT: addl %ecx, %esi
+; CHECK-O0-NEXT: addl %eax, %esi
+; CHECK-O0-NEXT: movl %esi, %eax
; CHECK-O0-NEXT: addq $24, %rsp
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
; CHECK-O0-NEXT: retq
@@ -417,10 +418,10 @@ define swiftcc { i32, i32, i32, i32 } @gen7(i32 %key) {
;
; CHECK-O0-LABEL: gen7:
; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movl %edi, %eax
+; CHECK-O0-NEXT: movl %edi, %edx
+; CHECK-O0-NEXT: movl %edi, %ecx
; CHECK-O0-NEXT: movl %edi, %r8d
-; CHECK-O0-NEXT: movl %r8d, %eax
-; CHECK-O0-NEXT: movl %r8d, %edx
-; CHECK-O0-NEXT: movl %r8d, %ecx
; CHECK-O0-NEXT: retq
%v0 = insertvalue { i32, i32, i32, i32 } undef, i32 %key, 0
%v1 = insertvalue { i32, i32, i32, i32 } %v0, i32 %key, 1
@@ -440,10 +441,10 @@ define swiftcc { i64, i64, i64, i64 } @gen8(i64 %key) {
;
; CHECK-O0-LABEL: gen8:
; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movq %rdi, %rax
+; CHECK-O0-NEXT: movq %rdi, %rdx
+; CHECK-O0-NEXT: movq %rdi, %rcx
; CHECK-O0-NEXT: movq %rdi, %r8
-; CHECK-O0-NEXT: movq %r8, %rax
-; CHECK-O0-NEXT: movq %r8, %rdx
-; CHECK-O0-NEXT: movq %r8, %rcx
; CHECK-O0-NEXT: retq
%v0 = insertvalue { i64, i64, i64, i64 } undef, i64 %key, 0
%v1 = insertvalue { i64, i64, i64, i64 } %v0, i64 %key, 1
@@ -463,10 +464,11 @@ define swiftcc { i8, i8, i8, i8 } @gen9(i8 %key) {
;
; CHECK-O0-LABEL: gen9:
; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: # kill: def $dil killed $dil killed $edi
+; CHECK-O0-NEXT: movb %dil, %al
+; CHECK-O0-NEXT: movb %dil, %dl
+; CHECK-O0-NEXT: movb %dil, %cl
; CHECK-O0-NEXT: movb %dil, %r8b
-; CHECK-O0-NEXT: movb %r8b, %al
-; CHECK-O0-NEXT: movb %r8b, %dl
-; CHECK-O0-NEXT: movb %r8b, %cl
; CHECK-O0-NEXT: retq
%v0 = insertvalue { i8, i8, i8, i8 } undef, i8 %key, 0
%v1 = insertvalue { i8, i8, i8, i8 } %v0, i8 %key, 1
@@ -488,14 +490,17 @@ define swiftcc { double, double, double, double, i64, i64, i64, i64 } @gen10(dou
;
; CHECK-O0-LABEL: gen10:
; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
+; CHECK-O0-NEXT: # xmm1 = mem[0],zero
+; CHECK-O0-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 8-byte Reload
+; CHECK-O0-NEXT: # xmm2 = mem[0],zero
+; CHECK-O0-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 8-byte Reload
+; CHECK-O0-NEXT: # xmm3 = mem[0],zero
+; CHECK-O0-NEXT: movq %rdi, %rax
+; CHECK-O0-NEXT: movq %rdi, %rdx
+; CHECK-O0-NEXT: movq %rdi, %rcx
; CHECK-O0-NEXT: movq %rdi, %r8
-; CHECK-O0-NEXT: movaps %xmm0, %xmm3
-; CHECK-O0-NEXT: movaps %xmm3, %xmm0
-; CHECK-O0-NEXT: movaps %xmm3, %xmm1
-; CHECK-O0-NEXT: movaps %xmm3, %xmm2
-; CHECK-O0-NEXT: movq %r8, %rax
-; CHECK-O0-NEXT: movq %r8, %rdx
-; CHECK-O0-NEXT: movq %r8, %rcx
; CHECK-O0-NEXT: retq
%v0 = insertvalue { double, double, double, double, i64, i64, i64, i64 } undef, double %keyd, 0
%v1 = insertvalue { double, double, double, double, i64, i64, i64, i64 } %v0, double %keyd, 1
@@ -564,15 +569,13 @@ define swiftcc { <4 x float>, float } @test12() #0 {
;
; CHECK-O0-LABEL: test12:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: subq $24, %rsp
-; CHECK-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-O0-NEXT: pushq %rax
+; CHECK-O0-NEXT: .cfi_def_cfa_offset 16
; CHECK-O0-NEXT: callq gen12
-; CHECK-O0-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill
-; CHECK-O0-NEXT: movaps %xmm3, %xmm1
-; CHECK-O0-NEXT: movaps (%rsp), %xmm3 # 16-byte Reload
-; CHECK-O0-NEXT: addps %xmm3, %xmm0
+; CHECK-O0-NEXT: addps %xmm1, %xmm0
; CHECK-O0-NEXT: addps %xmm2, %xmm0
-; CHECK-O0-NEXT: addq $24, %rsp
+; CHECK-O0-NEXT: movaps %xmm3, %xmm1
+; CHECK-O0-NEXT: popq %rax
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
; CHECK-O0-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/swifterror.ll b/llvm/test/CodeGen/X86/swifterror.ll
index 8877cd410859..1afae31b2b8d 100644
--- a/llvm/test/CodeGen/X86/swifterror.ll
+++ b/llvm/test/CodeGen/X86/swifterror.ll
@@ -18,9 +18,8 @@ define float @foo(%swift_error** swifterror %error_ptr_ref) {
; CHECK-O0-LABEL: foo:
; CHECK-O0: movl $16
; CHECK-O0: malloc
-; CHECK-O0: movq %{{.*}}, %r12
; CHECK-O0: movb $1, 8(%rax)
-
+; CHECK-O0: movq %{{.*}}, %r12
entry:
%call = call i8* @malloc(i64 16)
%call.0 = bitcast i8* %call to %swift_error*
@@ -122,17 +121,19 @@ define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) {
; CHECK-APPLE: ret
; CHECK-O0-LABEL: foo_if:
+; CHECK-O0: cmpl $0
; spill to stack
; CHECK-O0: movq %r12, {{.*}}(%rsp)
-; CHECK-O0: cmpl $0
; CHECK-O0: je
; CHECK-O0: movl $16,
; CHECK-O0: malloc
-; CHECK-O0: movq %rax, %r12
+; CHECK-O0: movq %rax, [[ID:%[a-z]+]]
; CHECK-O0-DAG: movb $1, 8(%rax)
+; CHECK-O0-DAG: movq [[ID]], %r12
; CHECK-O0: ret
; reload from stack
-; CHECK-O0: movq {{.*}}(%rsp), %r12
+; CHECK-O0: movq {{.*}}(%rsp), [[REG:%[a-z]+]]
+; CHECK-O0: movq [[REG]], %r12
; CHECK-O0: ret
entry:
%cond = icmp ne i32 %cc, 0
@@ -176,7 +177,8 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float
; CHECK-O0: movb $1, 8([[ID]])
; CHECK-O0: jbe
; reload from stack
-; CHECK-O0: movq {{.*}}(%rsp), %r12
+; CHECK-O0: movq {{.*}}(%rsp), [[REG:%[a-z0-9]+]]
+; CHECK-O0: movq [[REG]], %r12
; CHECK-O0: ret
entry:
br label %bb_loop
@@ -216,15 +218,16 @@ define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swi
; CHECK-APPLE-NOT: x19
; CHECK-O0-LABEL: foo_sret:
+; CHECK-O0: movl $16,
; spill sret to stack
; CHECK-O0: movq %rdi,
-; CHECK-O0: movl $16,
+; CHECK-O0: movq {{.*}}, %rdi
; CHECK-O0: malloc
+; CHECK-O0: movb $1, 8(%rax)
+; CHECK-O0: movl %{{.*}}, 4(%{{.*}})
+; CHECK-O0: movq %{{.*}}, %r12
; reload sret from stack
; CHECK-O0: movq {{.*}}(%rsp), %rax
-; CHECK-O0: movq %{{.*}}, %r12
-; CHECK-O0: movb $1, 8(%rcx)
-; CHECK-O0: movl %{{.*}}, 4(%{{.*}})
; CHECK-O0: ret
entry:
%call = call i8* @malloc(i64 16)
@@ -253,8 +256,8 @@ define float @caller3(i8* %error_ref) {
; CHECK-O0-LABEL: caller3:
; CHECK-O0: xorl
; CHECK-O0: movl {{.*}}, %r12d
-; CHECK-O0: leaq {{.*}}, %rdi
; CHECK-O0: movl $1, %esi
+; CHECK-O0: movq {{.*}}, %rdi
; CHECK-O0: callq {{.*}}foo_sret
; CHECK-O0: movq %r12,
; CHECK-O0: cmpq $0
@@ -384,9 +387,8 @@ define swiftcc float @foo_swiftcc(%swift_error** swifterror %error_ptr_ref) {
; CHECK-O0-LABEL: foo_swiftcc:
; CHECK-O0: movl $16
; CHECK-O0: malloc
-; CHECK-O0: movq %{{.*}}, %r12
; CHECK-O0: movb $1, 8(%rax)
-
+; CHECK-O0: movq %{{.*}}, %r12
entry:
%call = call i8* @malloc(i64 16)
%call.0 = bitcast i8* %call to %swift_error*
@@ -433,17 +435,19 @@ define swiftcc float @conditionally_forward_swifterror(%swift_error** swifterror
; CHECK-O0-LABEL: conditionally_forward_swifterror:
; CHECK-O0: pushq [[REG1:%[a-z0-9]+]]
-; CHECK-O0-DAG: movq %r12, (%rsp)
; CHECK-O0: cmpl $0, %edi
+; CHECK-O0-DAG: movq %r12, (%rsp)
; CHECK-O0: je
-; CHECK-O0: movq (%rsp), %r12
+; CHECK-O0: movq (%rsp), [[REG:%[a-z0-9]+]]
+; CHECK-O0: movq [[REG]], %r12
; CHECK-O0: callq _moo
; CHECK-O0: popq [[REG1]]
; CHECK-O0: retq
-; CHECK-O0: movq (%rsp), %r12
+; CHECK-O0: movq (%rsp), [[REG:%[a-z0-9]+]]
; CHECK-O0: xorps %xmm0, %xmm0
+; CHECK-O0: movq [[REG]], %r12
; CHECK-O0: popq [[REG1]]
; CHECK-O0: retq
entry:
@@ -741,9 +745,10 @@ a:
; CHECK-O0-LABEL: testAssign2
; CHECK-O0: movq %r12, [[SLOT:[-a-z0-9\(\)\%]*]]
; CHECK-O0: jmp
-; CHECK-O0: movq [[SLOT]], %r12
-; CHECK-O0-NEXT: movq %r12, %rax
-; CHECK-O0-NEXT: retq
+; CHECK-O0: movq [[SLOT]], %rax
+; CHECK-O0: movq %rax, [[SLOT2:[-a-z0-9\(\)\%]*]]
+; CHECK-O0: movq [[SLOT2]], %r12
+; CHECK-O0: retq
; CHECK-APPLE-LABEL: testAssign2
; CHECK-APPLE: movq %r12, %rax
@@ -760,10 +765,11 @@ a:
; CHECK-O0-LABEL: testAssign3
; CHECK-O0: callq _foo2
; CHECK-O0: movq %r12, [[SLOT:[-a-z0-9\(\)\%]*]]
-; CHECK-O0: movq [[SLOT]], %r12
-; CHECK-O0-NEXT: movq %r12, %rax
-; CHECK-O0-NEXT: popq %rcx
-; CHECK-O0-NEXT: retq
+; CHECK-O0: movq [[SLOT]], %rax
+; CHECK-O0: movq %rax, [[SLOT2:[-a-z0-9\(\)\%]*]]
+; CHECK-O0: movq [[SLOT2]], %r12
+; CHECK-O0: addq $24, %rsp
+; CHECK-O0: retq
; CHECK-APPLE-LABEL: testAssign3
; CHECK-APPLE: callq _foo2
@@ -786,10 +792,10 @@ a:
; CHECK-O0: xorl %eax, %eax
; CHECK-O0: ## kill: def $rax killed $eax
; CHECK-O0: movq %rax, [[SLOT:[-a-z0-9\(\)\%]*]]
-; CHECK-O0: movq [[SLOT]], %r12
-; CHECK-O0-NEXT: movq %r12, %rax
-; CHECK-O0-NEXT: popq %rcx
-; CHECK-O0-NEXT: retq
+; CHECK-O0: movq [[SLOT]], %rax
+; CHECK-O0: movq %rax, [[SLOT2:[-a-z0-9\(\)\%]*]]
+; CHECK-O0: movq [[SLOT2]], %r12
+; CHECK-O0: retq
; CHECK-APPLE-LABEL: testAssign4
; CHECK-APPLE: callq _foo2
diff --git a/llvm/test/CodeGen/X86/volatile.ll b/llvm/test/CodeGen/X86/volatile.ll
index a4f52dd4ca0a..d6ed45a1909f 100644
--- a/llvm/test/CodeGen/X86/volatile.ll
+++ b/llvm/test/CodeGen/X86/volatile.ll
@@ -5,14 +5,23 @@
@x = external global double
define void @foo() nounwind {
-; ALL-LABEL: foo:
-; ALL: # %bb.0:
-; ALL-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; ALL-NEXT: xorps %xmm0, %xmm0
-; ALL-NEXT: movsd %xmm0, x
-; ALL-NEXT: movsd %xmm0, x
-; ALL-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; ALL-NEXT: retl
+; OPT-LABEL: foo:
+; OPT: # %bb.0:
+; OPT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; OPT-NEXT: xorps %xmm0, %xmm0
+; OPT-NEXT: movsd %xmm0, x
+; OPT-NEXT: movsd %xmm0, x
+; OPT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; OPT-NEXT: retl
+;
+; NOOPT-LABEL: foo:
+; NOOPT: # %bb.0:
+; NOOPT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; NOOPT-NEXT: xorps %xmm1, %xmm1
+; NOOPT-NEXT: movsd %xmm1, x
+; NOOPT-NEXT: movsd %xmm1, x
+; NOOPT-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; NOOPT-NEXT: retl
%a = load volatile double, double* @x
store volatile double 0.0, double* @x
store volatile double 0.0, double* @x
diff --git a/llvm/test/CodeGen/X86/win64_eh.ll b/llvm/test/CodeGen/X86/win64_eh.ll
index ea795906a94c..caadea4fe2e4 100644
--- a/llvm/test/CodeGen/X86/win64_eh.ll
+++ b/llvm/test/CodeGen/X86/win64_eh.ll
@@ -82,11 +82,11 @@ entry:
}
; WIN64-LABEL: foo3:
; WIN64: .seh_proc foo3
-; NORM: subq $16, %rsp
-; ATOM: leaq -16(%rsp), %rsp
-; WIN64: .seh_stackalloc 16
+; NORM: subq $24, %rsp
+; ATOM: leaq -24(%rsp), %rsp
+; WIN64: .seh_stackalloc 24
; WIN64: .seh_endprologue
-; WIN64: addq $16, %rsp
+; WIN64: addq $24, %rsp
; WIN64: ret
; WIN64: .seh_endproc
diff --git a/llvm/test/CodeGen/X86/x86-32-intrcc.ll b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
index be325e2f0edc..af2d96577213 100644
--- a/llvm/test/CodeGen/X86/x86-32-intrcc.ll
+++ b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
@@ -42,9 +42,9 @@ define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i32 %eco
; CHECK0-LABEL: test_isr_ecode
; CHECK0: pushl %ecx
; CHECK0: pushl %eax
- ; CHECK0: movl 8(%esp), %ecx
- ; CHECK0: leal 12(%esp), %eax
- ; CHECK0: movl 8(%eax), %eax
+ ; CHECK0: movl 8(%esp), %eax
+ ; CHECK0: leal 12(%esp), %ecx
+ ; CHECK0: movl 8(%ecx), %ecx
; CHECK0: popl %eax
; CHECK0: popl %ecx
; CHECK0: addl $4, %esp
diff --git a/llvm/test/CodeGen/X86/x86-64-intrcc.ll b/llvm/test/CodeGen/X86/x86-64-intrcc.ll
index 548f7100b028..866108e3cd3c 100644
--- a/llvm/test/CodeGen/X86/x86-64-intrcc.ll
+++ b/llvm/test/CodeGen/X86/x86-64-intrcc.ll
@@ -43,9 +43,9 @@ define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i64 %eco
; CHECK0: pushq %rax
; CHECK0: pushq %rax
; CHECK0: pushq %rcx
- ; CHECK0: movq 24(%rsp), %rcx
- ; CHECK0: leaq 32(%rsp), %rax
- ; CHECK0: movq 16(%rax), %rax
+ ; CHECK0: movq 24(%rsp), %rax
+ ; CHECK0: leaq 32(%rsp), %rcx
+ ; CHECK0: movq 16(%rcx), %rcx
; CHECK0: popq %rcx
; CHECK0: popq %rax
; CHECK0: addq $16, %rsp
diff --git a/llvm/test/DebugInfo/AArch64/frameindices.ll b/llvm/test/DebugInfo/AArch64/frameindices.ll
index b53fbf6fd088..a74e6bad3058 100644
--- a/llvm/test/DebugInfo/AArch64/frameindices.ll
+++ b/llvm/test/DebugInfo/AArch64/frameindices.ll
@@ -5,7 +5,7 @@
; CHECK: DW_TAG_inlined_subroutine
; CHECK: "_Z3f111A"
; CHECK: DW_TAG_formal_parameter
-; CHECK: DW_AT_location [DW_FORM_block1] (DW_OP_piece 0x1, DW_OP_fbreg -47, DW_OP_piece 0xf, DW_OP_piece 0x1, DW_OP_breg31 WSP+42, DW_OP_piece 0x7)
+; CHECK: DW_AT_location [DW_FORM_block1] (DW_OP_piece 0x1, DW_OP_fbreg -47, DW_OP_piece 0xf, DW_OP_piece 0x1, DW_OP_fbreg -54, DW_OP_piece 0x7)
; CHECK: DW_AT_abstract_origin {{.*}} "p1"
;
; long a;
diff --git a/llvm/test/DebugInfo/AArch64/prologue_end.ll b/llvm/test/DebugInfo/AArch64/prologue_end.ll
index 660ce3aa43ab..bafbcf752aa4 100644
--- a/llvm/test/DebugInfo/AArch64/prologue_end.ll
+++ b/llvm/test/DebugInfo/AArch64/prologue_end.ll
@@ -9,8 +9,9 @@
define void @prologue_end_test() nounwind uwtable !dbg !4 {
; CHECK: prologue_end_test:
; CHECK: .cfi_startproc
+ ; CHECK: sub sp, sp
; CHECK: stp x29, x30
- ; CHECK: mov x29, sp
+ ; CHECK: add x29, sp
; CHECK: .loc 1 3 3 prologue_end
; CHECK: bl _func
; CHECK: bl _func
diff --git a/llvm/test/DebugInfo/ARM/prologue_end.ll b/llvm/test/DebugInfo/ARM/prologue_end.ll
index 5b50448ad80f..2c4922d1a8a4 100644
--- a/llvm/test/DebugInfo/ARM/prologue_end.ll
+++ b/llvm/test/DebugInfo/ARM/prologue_end.ll
@@ -11,6 +11,7 @@ define void @prologue_end_test() nounwind uwtable !dbg !4 {
; CHECK: prologue_end_test:
; CHECK: push {r7, lr}
; CHECK: {{mov r7, sp|add r7, sp}}
+ ; CHECK: sub sp
; CHECK: .loc 1 3 3 prologue_end
; CHECK: bl {{_func|Ltmp}}
; CHECK: bl {{_func|Ltmp}}
diff --git a/llvm/test/DebugInfo/Mips/delay-slot.ll b/llvm/test/DebugInfo/Mips/delay-slot.ll
index 07c2caa8999c..8f444bce30fd 100644
--- a/llvm/test/DebugInfo/Mips/delay-slot.ll
+++ b/llvm/test/DebugInfo/Mips/delay-slot.ll
@@ -14,10 +14,10 @@
; CHECK: ------------------ ------ ------ ------ --- ------------- -------------
; CHECK: 0x0000000000000000 1 0 1 0 0 is_stmt
; CHECK: 0x0000000000000004 2 0 1 0 0 is_stmt prologue_end
-; CHECK: 0x0000000000000020 3 0 1 0 0 is_stmt
-; CHECK: 0x0000000000000030 4 0 1 0 0 is_stmt
+; CHECK: 0x0000000000000024 3 0 1 0 0 is_stmt
+; CHECK: 0x0000000000000034 4 0 1 0 0 is_stmt
; CHECK: 0x0000000000000048 5 0 1 0 0 is_stmt
-; CHECK: 0x0000000000000050 5 0 1 0 0 is_stmt end_sequence
+; CHECK: 0x0000000000000058 5 0 1 0 0 is_stmt end_sequence
target datalayout = "E-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64"
diff --git a/llvm/test/DebugInfo/Mips/prologue_end.ll b/llvm/test/DebugInfo/Mips/prologue_end.ll
index de907cdc651f..7886b9b0485f 100644
--- a/llvm/test/DebugInfo/Mips/prologue_end.ll
+++ b/llvm/test/DebugInfo/Mips/prologue_end.ll
@@ -30,7 +30,7 @@ entry:
; PIC: addiu $[[R0]], $[[R0]], %lo(_gp_disp)
; PIC: addiu $sp, $sp, -{{[0-9]+}}
; PIC: sw $ra, {{[0-9]+}}($sp)
-; PIC: addu $[[R1:[0-9]+|gp]], $[[R0]], $25
+; PIC: addu $[[R1:[0-9]+]], $[[R0]], $25
; PIC: .loc 1 2 3 prologue_end
; PIC: lw $[[R2:[0-9]+]], %got($.str)($[[R1]])
@@ -40,7 +40,7 @@ entry:
; PIC-FP: sw $ra, {{[0-9]+}}($sp)
; PIC-FP: sw $fp, {{[0-9]+}}($sp)
; PIC-FP: move $fp, $sp
-; PIC-FP: addu $[[R1:[0-9]+|gp]], $[[R0]], $25
+; PIC-FP: addu $[[R1:[0-9]+]], $[[R0]], $25
; PIC-FP: .loc 1 2 3 prologue_end
; PIC-FP: lw $[[R2:[0-9]+]], %got($.str)($[[R1]])
diff --git a/llvm/test/DebugInfo/X86/dbg-declare-arg.ll b/llvm/test/DebugInfo/X86/dbg-declare-arg.ll
index b2b88cb8b1b8..1fa53462b840 100644
--- a/llvm/test/DebugInfo/X86/dbg-declare-arg.ll
+++ b/llvm/test/DebugInfo/X86/dbg-declare-arg.ll
@@ -20,7 +20,7 @@ target triple = "x86_64-apple-macosx10.6.7"
; CHECK: DW_AT_name {{.*}}"j"
; CHECK: DW_TAG_variable
; CHECK-NEXT: DW_AT_location [DW_FORM_sec_offset] (
-; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): DW_OP_breg7 RSP+8, DW_OP_deref)
+; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): DW_OP_breg7 RSP+16, DW_OP_deref)
; CHECK-NEXT: DW_AT_name {{.*}}"my_a"
%class.A = type { i32, i32, i32, i32 }
diff --git a/llvm/test/DebugInfo/X86/fission-ranges.ll b/llvm/test/DebugInfo/X86/fission-ranges.ll
index 8174cabe2932..e8d8bd86f2fe 100644
--- a/llvm/test/DebugInfo/X86/fission-ranges.ll
+++ b/llvm/test/DebugInfo/X86/fission-ranges.ll
@@ -10,11 +10,11 @@
; LiveDebugValues should produce DBG_VALUEs for variable "b" in successive
; blocks once we recognize that it is spilled.
; CHECK-MIR: ![[BDIVAR:[0-9]+]] = !DILocalVariable(name: "b"
-; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 24, DW_OP_minus)
+; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 32, DW_OP_minus)
; CHECK-MIR-LABEL: bb.6.for.inc13:
-; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 24, DW_OP_minus)
+; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 32, DW_OP_minus)
; CHECK-MIR-LABEL: bb.7.for.inc16:
-; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 24, DW_OP_minus)
+; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 32, DW_OP_minus)
; CHECK: .debug_info contents:
@@ -46,20 +46,20 @@
; CHECK: [[A]]:
; CHECK-NEXT: DW_LLE_startx_length (0x00000002, 0x0000000f): DW_OP_consts +0, DW_OP_stack_value
-; CHECK-NEXT: DW_LLE_startx_length (0x00000003, 0x0000000b): DW_OP_reg0 RAX
-; CHECK-NEXT: DW_LLE_startx_length (0x00000004, 0x00000012): DW_OP_breg7 RSP-4
+; CHECK-NEXT: DW_LLE_startx_length (0x00000003, 0x0000000f): DW_OP_reg0 RAX
+; CHECK-NEXT: DW_LLE_startx_length (0x00000004, 0x00000012): DW_OP_breg7 RSP-8
; CHECK-NEXT: DW_LLE_end_of_list ()
; CHECK: [[E]]:
-; CHECK-NEXT: DW_LLE_startx_length (0x00000005, 0x0000000b): DW_OP_reg0 RAX
-; CHECK-NEXT: DW_LLE_startx_length (0x00000006, 0x0000005a): DW_OP_breg7 RSP-36
+; CHECK-NEXT: DW_LLE_startx_length (0x00000005, 0x00000009): DW_OP_reg0 RAX
+; CHECK-NEXT: DW_LLE_startx_length (0x00000006, 0x00000062): DW_OP_breg7 RSP-44
; CHECK-NEXT: DW_LLE_end_of_list ()
; CHECK: [[B]]:
-; CHECK-NEXT: DW_LLE_startx_length (0x00000007, 0x0000000b): DW_OP_reg0 RAX
-; CHECK-NEXT: DW_LLE_startx_length (0x00000008, 0x00000042): DW_OP_breg7 RSP-24
+; CHECK-NEXT: DW_LLE_startx_length (0x00000007, 0x0000000f): DW_OP_reg0 RAX
+; CHECK-NEXT: DW_LLE_startx_length (0x00000008, 0x00000042): DW_OP_breg7 RSP-32
; CHECK-NEXT: DW_LLE_end_of_list ()
; CHECK: [[D]]:
-; CHECK-NEXT: DW_LLE_startx_length (0x00000009, 0x0000000b): DW_OP_reg0 RAX
-; CHECK-NEXT: DW_LLE_startx_length (0x0000000a, 0x0000002a): DW_OP_breg7 RSP-12
+; CHECK-NEXT: DW_LLE_startx_length (0x00000009, 0x0000000f): DW_OP_reg0 RAX
+; CHECK-NEXT: DW_LLE_startx_length (0x0000000a, 0x0000002a): DW_OP_breg7 RSP-20
; CHECK-NEXT: DW_LLE_end_of_list ()
; Make sure we don't produce any relocations in any .dwo section (though in particular, debug_info.dwo)
@@ -81,7 +81,7 @@
; V5RNGLISTS-NOT: DW_TAG
; V5RNGLISTS: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x0000000c)
; V5RNGLISTS: .debug_rnglists contents:
-; V5RNGLISTS-NEXT: 0x00000000: range list header: length = 0x00000015, format = DWARF32, version = 0x0005,
+; V5RNGLISTS-NEXT: 0x00000000: range list header: length = 0x00000019, format = DWARF32, version = 0x0005,
; V5RNGLISTS-SAME: addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000001
; V5RNGLISTS-NEXT: offsets: [
; V5RNGLISTS-NEXT: => 0x00000010
@@ -96,7 +96,7 @@
; extern int c;
; static void foo (int p)
; {
-; int a, b;
+; int a, b;
; unsigned int d, e;
; for (a = 0; a < 30; a++)
@@ -104,12 +104,12 @@
; for (b = 0; b < 30; b++)
; for (e = 0; e < 30; e++)
; {
-; int *w = &c;
-; *w &= p;
+; int *w = &c;
+; *w &= p;
; }
; }
-; void
+; void
; bar ()
; {
; foo (1);
diff --git a/llvm/test/DebugInfo/X86/op_deref.ll b/llvm/test/DebugInfo/X86/op_deref.ll
index e357d3c9b02e..1b49dc554f7e 100644
--- a/llvm/test/DebugInfo/X86/op_deref.ll
+++ b/llvm/test/DebugInfo/X86/op_deref.ll
@@ -6,12 +6,10 @@
; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=DWARF3
; DWARF4: DW_AT_location [DW_FORM_sec_offset] (0x00000000
-; DWARF4-NEXT: {{.*}}: DW_OP_breg6 RBP-40, DW_OP_deref, DW_OP_deref
-; DWARF4-NEXT: {{.*}}: DW_OP_breg0 RAX+0, DW_OP_deref)
+; DWARF4-NEXT: {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref
; DWARF3: DW_AT_location [DW_FORM_data4] (0x00000000
-; DWARF3-NEXT: {{.*}}: DW_OP_breg6 RBP-40, DW_OP_deref, DW_OP_deref
-; DWARF3-NEXT: {{.*}}: DW_OP_breg0 RAX+0, DW_OP_deref
+; DWARF3-NEXT: {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref
; CHECK-NOT: DW_TAG
; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000067] = "vla")
@@ -19,8 +17,8 @@
; Check the DEBUG_VALUE comments for good measure.
; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o - -filetype=asm | FileCheck %s -check-prefix=ASM-CHECK
; vla should have a register-indirect address at one point.
-; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rax+0]
-; ASM-CHECK: DW_OP_breg6
+; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rcx+0]
+; ASM-CHECK: DW_OP_breg2
; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s --check-prefix=PRETTY-PRINT
; PRETTY-PRINT: DIExpression(DW_OP_deref)
diff --git a/llvm/test/DebugInfo/X86/parameters.ll b/llvm/test/DebugInfo/X86/parameters.ll
index 9b139eaffffc..5f4edd5b963d 100644
--- a/llvm/test/DebugInfo/X86/parameters.ll
+++ b/llvm/test/DebugInfo/X86/parameters.ll
@@ -37,8 +37,8 @@
; CHECK: DW_AT_location{{.*}}(DW_OP_fbreg +23)
; CHECK: DW_TAG_formal_parameter
; CHECK: DW_AT_location{{.*}}(
-; CHECK-NEXT: {{.*}}: DW_OP_breg7 RSP+8, DW_OP_deref, DW_OP_deref
-; CHECK-NEXT: {{.*}}: DW_OP_breg4 RSI+0, DW_OP_deref)
+; CHECK-NEXT: {{.*}}: DW_OP_breg4 RSI+0, DW_OP_deref
+; CHECK-NEXT: {{.*}}: DW_OP_breg7 RSP+8, DW_OP_deref, DW_OP_deref)
; CHECK-NOT: DW_TAG
; CHECK: DW_AT_name{{.*}} = "g"
diff --git a/llvm/test/DebugInfo/X86/pieces-1.ll b/llvm/test/DebugInfo/X86/pieces-1.ll
index f614f78c6581..c333f2b33f25 100644
--- a/llvm/test/DebugInfo/X86/pieces-1.ll
+++ b/llvm/test/DebugInfo/X86/pieces-1.ll
@@ -16,7 +16,7 @@
; CHECK: .debug_loc contents:
;
-; CHECK: (0x0000000000000006, 0x[[LTMP3:.*]]): DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_reg0 RAX, DW_OP_piece 0x4
+; CHECK: (0x0000000000000000, 0x[[LTMP3:.*]]): DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_reg4 RSI, DW_OP_piece 0x4
; 0x0000000000000006 - 0x0000000000000008: rbp-8, piece 0x8, rax, piece 0x4 )
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/llvm/test/DebugInfo/X86/prologue-stack.ll b/llvm/test/DebugInfo/X86/prologue-stack.ll
index 299f10f18b48..6072543861d3 100644
--- a/llvm/test/DebugInfo/X86/prologue-stack.ll
+++ b/llvm/test/DebugInfo/X86/prologue-stack.ll
@@ -6,7 +6,7 @@
; return 0;
; }
-define i32 @isel_line_test2(i32 %arg) nounwind uwtable !dbg !5 {
+define i32 @isel_line_test2() nounwind uwtable !dbg !5 {
; The stack adjustment should be part of the prologue.
; CHECK: isel_line_test2:
; CHECK: {{subq|leaq}} {{.*}}, %rsp
@@ -14,9 +14,8 @@ define i32 @isel_line_test2(i32 %arg) nounwind uwtable !dbg !5 {
; CHECK: movl $400, %edi
; CHECK: callq callme
entry:
- ; %arg should get spilled here, so we need to setup a stackframe
%call = call i32 @callme(i32 400), !dbg !10
- ret i32 %arg, !dbg !12
+ ret i32 0, !dbg !12
}
declare i32 @callme(i32)
diff --git a/llvm/test/DebugInfo/X86/reference-argument.ll b/llvm/test/DebugInfo/X86/reference-argument.ll
index 3beb16e2ff14..4bdb44a796ed 100644
--- a/llvm/test/DebugInfo/X86/reference-argument.ll
+++ b/llvm/test/DebugInfo/X86/reference-argument.ll
@@ -13,7 +13,7 @@
; CHECK-NOT: DW_TAG_subprogram
; CHECK: DW_TAG_formal_parameter
; CHECK-NEXT: DW_AT_location
-; CHECK-NEXT: DW_OP_breg5 RDI+0
+; CHECK-NEXT: DW_OP_breg4 RSI+0
; CHECK-NEXT: DW_AT_name {{.*}} "v"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
diff --git a/llvm/test/DebugInfo/X86/spill-indirect-nrvo.ll b/llvm/test/DebugInfo/X86/spill-indirect-nrvo.ll
index fb0c5779ca8b..82c852034aeb 100644
--- a/llvm/test/DebugInfo/X86/spill-indirect-nrvo.ll
+++ b/llvm/test/DebugInfo/X86/spill-indirect-nrvo.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s | FileCheck -check-prefixes=CHECK,OPT %s
-; RUN: llc -O0 < %s | FileCheck -check-prefixes=CHECK,OPTNONE %s
+; RUN: llc < %s | FileCheck %s
+; RUN: llc -O0 < %s | FileCheck %s
; Make sure we insert DW_OP_deref when spilling indirect DBG_VALUE instructions.
@@ -21,18 +21,10 @@
; }
; CHECK-LABEL: _Z10get_stringv:
-
-; OPT: #DEBUG_VALUE: get_string:result <- [$rdi+0]
-; OPT: movq %rdi, [[OFFS:[0-9]+]](%rsp) # 8-byte Spill
-; OPT: #DEBUG_VALUE: get_string:result <- [DW_OP_plus_uconst [[OFFS]], DW_OP_deref] [$rsp+0]
-; OPT: callq _ZN6stringC1Ei
-
-; OPTNONE: #DEBUG_VALUE: get_string:result <- [DW_OP_deref] [$rsp+0]
-; OPTNONE: movq %rdi, %rax
-; OPTNONE: movq %rax, [[OFFS:[0-9]+]](%rsp) # 8-byte Spill
-; OPTNONE: #DEBUG_VALUE: get_string:result <- [$rdi+0]
-; OPTNONE: callq _ZN6stringC1Ei
-
+; CHECK: #DEBUG_VALUE: get_string:result <- [$rdi+0]
+; CHECK: movq %rdi, [[OFFS:[0-9]+]](%rsp) # 8-byte Spill
+; CHECK: #DEBUG_VALUE: get_string:result <- [DW_OP_plus_uconst [[OFFS]], DW_OP_deref] [$rsp+0]
+; CHECK: callq _ZN6stringC1Ei
; CHECK: #APP
; CHECK: #NO_APP
diff --git a/llvm/test/DebugInfo/X86/sret.ll b/llvm/test/DebugInfo/X86/sret.ll
index f245cbaa627c..c87b57c524db 100644
--- a/llvm/test/DebugInfo/X86/sret.ll
+++ b/llvm/test/DebugInfo/X86/sret.ll
@@ -3,17 +3,16 @@
; Based on the debuginfo-tests/sret.cpp code.
-; CHECK-DWO: DW_AT_GNU_dwo_id (0x409e35dbb641730e)
-; CHECK-DWO: DW_AT_GNU_dwo_id (0x409e35dbb641730e)
+; CHECK-DWO: DW_AT_GNU_dwo_id (0x7e62530711b94622)
+; CHECK-DWO: DW_AT_GNU_dwo_id (0x7e62530711b94622)
-; RUN: llc -O0 -fast-isel=true -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -debug-info - | FileCheck -check-prefixes=CHECK,FASTISEL %s
-; RUN: llc -O0 -fast-isel=false -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -debug-info - | FileCheck -check-prefixes=CHECK,SDAG %s
+; RUN: llc -O0 -fast-isel=true -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -debug-info - | FileCheck %s
+; RUN: llc -O0 -fast-isel=false -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -debug-info - | FileCheck %s
; CHECK: _ZN1B9AInstanceEv
; CHECK: DW_TAG_variable
; CHECK-NEXT: DW_AT_location (0x00000000
-; FASTISEL-NEXT: [{{.*}}, {{.*}}): DW_OP_breg6 RBP-32, DW_OP_deref
-; FASTISEL-NEXT: [{{.*}}, {{.*}}): DW_OP_breg5 RDI+0)
-; SDAG-NEXT: [{{.*}}, {{.*}}): DW_OP_breg5 RDI+0)
+; CHECK-NEXT: [{{.*}}, {{.*}}): DW_OP_breg5 RDI+0
+; CHECK-NEXT: [{{.*}}, {{.*}}): DW_OP_breg6 RBP-24, DW_OP_deref)
; CHECK-NEXT: DW_AT_name {{.*}}"a"
%class.A = type { i32 (...)**, i32 }
diff --git a/llvm/test/DebugInfo/X86/subreg.ll b/llvm/test/DebugInfo/X86/subreg.ll
index 37f3181d8798..1a0feb95e931 100644
--- a/llvm/test/DebugInfo/X86/subreg.ll
+++ b/llvm/test/DebugInfo/X86/subreg.ll
@@ -3,7 +3,7 @@
; We are testing that a value in a 16 bit register gets reported as
; being in its superregister.
-; CHECK: .byte 80 # super-register DW_OP_reg0
+; CHECK: .byte 85 # super-register DW_OP_reg5
; No need to a piece at offset 0.
; CHECK-NOT: DW_OP_piece
; CHECK-NOT: DW_OP_bit_piece
More information about the lldb-commits
mailing list