[llvm] cbdccb3 - [RA] Split a virtual register in cold blocks if it is not assigned preferred physical register

Guozhi Wei via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 15 12:55:32 PDT 2023


Author: Guozhi Wei
Date: 2023-09-15T19:52:50Z
New Revision: cbdccb30c23f71f20d05b19256232419e7c5e517

URL: https://github.com/llvm/llvm-project/commit/cbdccb30c23f71f20d05b19256232419e7c5e517
DIFF: https://github.com/llvm/llvm-project/commit/cbdccb30c23f71f20d05b19256232419e7c5e517.diff

LOG: [RA] Split a virtual register in cold blocks if it is not assigned preferred physical register

If a virtual register is not assigned preferred physical register, it means some
COPY instructions will be changed to real register move instructions. In this
case we can try to split the virtual register in colder blocks, if success, the
original COPY instructions can be deleted, and the new COPY instructions in
colder blocks will be generated as register move instructions. It results in
fewer dynamic register move instructions executed.

The new test case split-reg-with-hint.ll gives an example, the hot path contains
24 instructions without this patch, now it is only 4 instructions with this
patch.

Differential Revision: https://reviews.llvm.org/D156491

Added: 
    llvm/test/CodeGen/X86/split-reg-with-hint.ll

Modified: 
    llvm/lib/CodeGen/RegAllocGreedy.cpp
    llvm/lib/CodeGen/RegAllocGreedy.h
    llvm/test/CodeGen/AArch64/cgp-usubo.ll
    llvm/test/CodeGen/AArch64/csr-split.ll
    llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
    llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir
    llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
    llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
    llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
    llvm/test/CodeGen/ARM/csr-split.ll
    llvm/test/CodeGen/ARM/divmod-eabi.ll
    llvm/test/CodeGen/ARM/thumb2-size-opt.ll
    llvm/test/CodeGen/AVR/cttz.ll
    llvm/test/CodeGen/Hexagon/noreturn-noepilog.ll
    llvm/test/CodeGen/PowerPC/csr-split.ll
    llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll
    llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
    llvm/test/CodeGen/PowerPC/subreg-postra.ll
    llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll
    llvm/test/CodeGen/PowerPC/tls-cse.ll
    llvm/test/CodeGen/RISCV/double-stack-spill-restore.ll
    llvm/test/CodeGen/RISCV/fmax-fmin.ll
    llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
    llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
    llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll
    llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
    llvm/test/CodeGen/X86/cgp-usubo.ll
    llvm/test/CodeGen/X86/csr-split.ll
    llvm/test/CodeGen/X86/ragreedy-bug.ll
    llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
    llvm/test/CodeGen/X86/speculative-load-hardening.ll
    llvm/test/CodeGen/X86/statepoint-call-lowering.ll
    llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir
    llvm/test/CodeGen/X86/statepoint-ra.ll
    llvm/test/CodeGen/X86/statepoint-vreg-details.ll
    llvm/test/CodeGen/X86/statepoint-vreg.ll
    llvm/test/DebugInfo/ARM/sdag-split-arg.ll
    llvm/test/DebugInfo/X86/live-debug-values.ll
    llvm/test/tools/llvm-objdump/XCOFF/disassemble-symbolize-operands.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 68f6ea3268a9ae8..f97cb1a0fb722a6 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -141,6 +141,12 @@ static cl::opt<bool> GreedyReverseLocalAssignment(
              "shorter local live ranges will tend to be allocated first"),
     cl::Hidden);
 
+static cl::opt<unsigned> SplitThresholdForRegWithHint(
+    "split-threshold-for-reg-with-hint",
+    cl::desc("The threshold for splitting a virtual register with a hint, in "
+             "percentate"),
+    cl::init(75), cl::Hidden);
+
 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
                                        createGreedyRegisterAllocator);
 
@@ -422,6 +428,11 @@ MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
         evictInterference(VirtReg, PhysHint, NewVRegs);
         return PhysHint;
       }
+
+      // We can also split the virtual register in cold blocks.
+      if (trySplitAroundHintReg(PhysHint, VirtReg, NewVRegs, Order))
+        return 0;
+
       // Record the missed hint, we may be able to recover
       // at the end if the surrounding allocation changed.
       SetOfBrokenHints.insert(&VirtReg);
@@ -1064,86 +1075,98 @@ MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
   return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
 }
 
-unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
+unsigned
+RAGreedy::calculateRegionSplitCostAroundReg(MCPhysReg PhysReg,
                                             AllocationOrder &Order,
                                             BlockFrequency &BestCost,
                                             unsigned &NumCands,
-                                            bool IgnoreCSR) {
-  unsigned BestCand = NoCand;
-  for (MCPhysReg PhysReg : Order) {
-    assert(PhysReg);
-    if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
-      continue;
-
-    // Discard bad candidates before we run out of interference cache cursors.
-    // This will only affect register classes with a lot of registers (>32).
-    if (NumCands == IntfCache.getMaxCursors()) {
-      unsigned WorstCount = ~0u;
-      unsigned Worst = 0;
-      for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
-        if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
-          continue;
-        unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
-        if (Count < WorstCount) {
-          Worst = CandIndex;
-          WorstCount = Count;
-        }
+                                            unsigned &BestCand) {
+  // Discard bad candidates before we run out of interference cache cursors.
+  // This will only affect register classes with a lot of registers (>32).
+  if (NumCands == IntfCache.getMaxCursors()) {
+    unsigned WorstCount = ~0u;
+    unsigned Worst = 0;
+    for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
+      if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
+        continue;
+      unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
+      if (Count < WorstCount) {
+        Worst = CandIndex;
+        WorstCount = Count;
       }
-      --NumCands;
-      GlobalCand[Worst] = GlobalCand[NumCands];
-      if (BestCand == NumCands)
-        BestCand = Worst;
     }
+    --NumCands;
+    GlobalCand[Worst] = GlobalCand[NumCands];
+    if (BestCand == NumCands)
+      BestCand = Worst;
+  }
 
-    if (GlobalCand.size() <= NumCands)
-      GlobalCand.resize(NumCands+1);
-    GlobalSplitCandidate &Cand = GlobalCand[NumCands];
-    Cand.reset(IntfCache, PhysReg);
+  if (GlobalCand.size() <= NumCands)
+    GlobalCand.resize(NumCands+1);
+  GlobalSplitCandidate &Cand = GlobalCand[NumCands];
+  Cand.reset(IntfCache, PhysReg);
 
-    SpillPlacer->prepare(Cand.LiveBundles);
-    BlockFrequency Cost;
-    if (!addSplitConstraints(Cand.Intf, Cost)) {
-      LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
-      continue;
-    }
-    LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = ";
-               MBFI->printBlockFreq(dbgs(), Cost));
-    if (Cost >= BestCost) {
-      LLVM_DEBUG({
-        if (BestCand == NoCand)
-          dbgs() << " worse than no bundles\n";
-        else
-          dbgs() << " worse than "
-                 << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
-      });
-      continue;
-    }
-    if (!growRegion(Cand)) {
-      LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
-      continue;
-    }
+  SpillPlacer->prepare(Cand.LiveBundles);
+  BlockFrequency Cost;
+  if (!addSplitConstraints(Cand.Intf, Cost)) {
+    LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
+    return BestCand;
+  }
+  LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = ";
+             MBFI->printBlockFreq(dbgs(), Cost));
+  if (Cost >= BestCost) {
+    LLVM_DEBUG({
+      if (BestCand == NoCand)
+        dbgs() << " worse than no bundles\n";
+      else
+        dbgs() << " worse than "
+               << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
+    });
+    return BestCand;
+  }
+  if (!growRegion(Cand)) {
+    LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
+    return BestCand;
+  }
+
+  SpillPlacer->finish();
+
+  // No live bundles, defer to splitSingleBlocks().
+  if (!Cand.LiveBundles.any()) {
+    LLVM_DEBUG(dbgs() << " no bundles.\n");
+    return BestCand;
+  }
+
+  Cost += calcGlobalSplitCost(Cand, Order);
+  LLVM_DEBUG({
+    dbgs() << ", total = ";
+    MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";
+    for (int I : Cand.LiveBundles.set_bits())
+      dbgs() << " EB#" << I;
+    dbgs() << ".\n";
+  });
+  if (Cost < BestCost) {
+    BestCand = NumCands;
+    BestCost = Cost;
+  }
+  ++NumCands;
 
-    SpillPlacer->finish();
+  return BestCand;
+}
 
-    // No live bundles, defer to splitSingleBlocks().
-    if (!Cand.LiveBundles.any()) {
-      LLVM_DEBUG(dbgs() << " no bundles.\n");
+unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
+                                            AllocationOrder &Order,
+                                            BlockFrequency &BestCost,
+                                            unsigned &NumCands,
+                                            bool IgnoreCSR) {
+  unsigned BestCand = NoCand;
+  for (MCPhysReg PhysReg : Order) {
+    assert(PhysReg);
+    if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
       continue;
-    }
 
-    Cost += calcGlobalSplitCost(Cand, Order);
-    LLVM_DEBUG({
-      dbgs() << ", total = ";
-      MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";
-      for (int I : Cand.LiveBundles.set_bits())
-        dbgs() << " EB#" << I;
-      dbgs() << ".\n";
-    });
-    if (Cost < BestCost) {
-      BestCand = NumCands;
-      BestCost = Cost;
-    }
-    ++NumCands;
+    calculateRegionSplitCostAroundReg(PhysReg, Order, BestCost, NumCands,
+                                      BestCand);
   }
 
   return BestCand;
@@ -1189,6 +1212,53 @@ unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
   return 0;
 }
 
+// VirtReg has a physical Hint, this function tries to split VirtReg around
+// Hint if we can place new COPY instructions in cold blocks.
+bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
+                                     const LiveInterval &VirtReg,
+                                     SmallVectorImpl<Register> &NewVRegs,
+                                     AllocationOrder &Order) {
+  BlockFrequency Cost = 0;
+  Register Reg = VirtReg.reg();
+
+  // Compute the cost of assigning a non Hint physical register to VirtReg.
+  // We define it as the total frequency of broken COPY instructions to/from
+  // Hint register, and after split, they can be deleted.
+  for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
+    if (!TII->isFullCopyInstr(Instr))
+      continue;
+    Register OtherReg = Instr.getOperand(1).getReg();
+    if (OtherReg == Reg) {
+      OtherReg = Instr.getOperand(0).getReg();
+      if (OtherReg == Reg)
+        continue;
+      // Check if VirtReg interferes with OtherReg after this COPY instruction.
+      if (VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot()))
+        continue;
+    }
+    MCRegister OtherPhysReg =
+        OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
+    if (OtherPhysReg == Hint)
+      Cost += MBFI->getBlockFreq(Instr.getParent());
+  }
+
+  // Decrease the cost so it will be split in colder blocks.
+  BranchProbability Threshold(SplitThresholdForRegWithHint, 100);
+  Cost *= Threshold;
+  if (Cost == 0)
+    return false;
+
+  unsigned NumCands = 0;
+  unsigned BestCand = NoCand;
+  SA->analyze(&VirtReg);
+  calculateRegionSplitCostAroundReg(Hint, Order, Cost, NumCands, BestCand);
+  if (BestCand == NoCand)
+    return false;
+
+  doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
+  return true;
+}
+
 //===----------------------------------------------------------------------===//
 //                            Per-Block Splitting
 //===----------------------------------------------------------------------===//
@@ -2329,6 +2399,9 @@ MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
     } else
       return PhysReg;
   }
+  // Non emtpy NewVRegs means VirtReg has been split.
+  if (!NewVRegs.empty())
+    return 0;
 
   LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
   LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "

diff  --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h
index 0f8f9a7d58112b8..1579e697ce3f5b0 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.h
+++ b/llvm/lib/CodeGen/RegAllocGreedy.h
@@ -348,6 +348,12 @@ class LLVM_LIBRARY_VISIBILITY RAGreedy : public MachineFunctionPass,
                       const SmallVirtRegSet &);
   MCRegister tryRegionSplit(const LiveInterval &, AllocationOrder &,
                             SmallVectorImpl<Register> &);
+  /// Calculate cost of region splitting around the specified register.
+  unsigned calculateRegionSplitCostAroundReg(MCPhysReg PhysReg,
+                                             AllocationOrder &Order,
+                                             BlockFrequency &BestCost,
+                                             unsigned &NumCands,
+                                             unsigned &BestCand);
   /// Calculate cost of region splitting.
   unsigned calculateRegionSplitCost(const LiveInterval &VirtReg,
                                     AllocationOrder &Order,
@@ -356,6 +362,10 @@ class LLVM_LIBRARY_VISIBILITY RAGreedy : public MachineFunctionPass,
   /// Perform region splitting.
   unsigned doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
                          bool HasCompact, SmallVectorImpl<Register> &NewVRegs);
+  /// Try to split VirtReg around physical Hint register.
+  bool trySplitAroundHintReg(MCPhysReg Hint, const LiveInterval &VirtReg,
+                             SmallVectorImpl<Register> &NewVRegs,
+                             AllocationOrder &Order);
   /// Check other options before using a callee-saved register for the first
   /// time.
   MCRegister tryAssignCSRFirstTime(const LiveInterval &VirtReg,

diff  --git a/llvm/test/CodeGen/AArch64/cgp-usubo.ll b/llvm/test/CodeGen/AArch64/cgp-usubo.ll
index 2f081cf96d8b88f..d307107fc07ee68 100644
--- a/llvm/test/CodeGen/AArch64/cgp-usubo.ll
+++ b/llvm/test/CodeGen/AArch64/cgp-usubo.ll
@@ -161,13 +161,13 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) nounwin
 ; CHECK-NEXT:    tbz w3, #0, .LBB8_3
 ; CHECK-NEXT:  // %bb.1: // %t
 ; CHECK-NEXT:    cmp x0, x1
-; CHECK-NEXT:    mov x23, x0
+; CHECK-NEXT:    mov x22, x0
 ; CHECK-NEXT:    mov x20, x2
 ; CHECK-NEXT:    cset w21, lo
-; CHECK-NEXT:    mov x22, x1
+; CHECK-NEXT:    mov x23, x1
 ; CHECK-NEXT:    mov w0, w21
 ; CHECK-NEXT:    bl call
-; CHECK-NEXT:    subs x8, x23, x22
+; CHECK-NEXT:    subs x8, x22, x23
 ; CHECK-NEXT:    b.hs .LBB8_3
 ; CHECK-NEXT:  // %bb.2: // %end
 ; CHECK-NEXT:    mov w19, w21

diff  --git a/llvm/test/CodeGen/AArch64/csr-split.ll b/llvm/test/CodeGen/AArch64/csr-split.ll
index da73c3a13a2e923..7b092b00b965572 100644
--- a/llvm/test/CodeGen/AArch64/csr-split.ll
+++ b/llvm/test/CodeGen/AArch64/csr-split.ll
@@ -83,19 +83,18 @@ declare signext i32 @callNonVoid(ptr) local_unnamed_addr
 define dso_local signext i32 @test2(ptr %p1) local_unnamed_addr uwtable  {
 ; CHECK-LABEL: test2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    .cfi_remember_state
 ; CHECK-NEXT:    cbz x0, .LBB1_3
 ; CHECK-NEXT:  // %bb.1: // %entry
 ; CHECK-NEXT:    adrp x8, a
-; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    ldrsw x8, [x8, :lo12:a]
 ; CHECK-NEXT:    cmp x8, x0
 ; CHECK-NEXT:    b.ne .LBB1_3
 ; CHECK-NEXT:  // %bb.2: // %if.then2
+; CHECK-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    bl callVoid
 ; CHECK-NEXT:    mov x0, x19
 ; CHECK-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
@@ -104,34 +103,28 @@ define dso_local signext i32 @test2(ptr %p1) local_unnamed_addr uwtable  {
 ; CHECK-NEXT:    .cfi_restore w30
 ; CHECK-NEXT:    b callNonVoid
 ; CHECK-NEXT:  .LBB1_3: // %return
-; CHECK-NEXT:    .cfi_restore_state
 ; CHECK-NEXT:    mov w0, wzr
-; CHECK-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-NEXT:    .cfi_restore w19
-; CHECK-NEXT:    .cfi_restore w30
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-APPLE-LABEL: test2:
 ; CHECK-APPLE:       ; %bb.0: ; %entry
-; CHECK-APPLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
-; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
-; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
-; CHECK-APPLE-NEXT:    .cfi_offset w19, -24
-; CHECK-APPLE-NEXT:    .cfi_offset w20, -32
-; CHECK-APPLE-NEXT:    .cfi_remember_state
 ; CHECK-APPLE-NEXT:    cbz x0, LBB1_3
 ; CHECK-APPLE-NEXT:  ; %bb.1: ; %entry
 ; CHECK-APPLE-NEXT:  Lloh2:
 ; CHECK-APPLE-NEXT:    adrp x8, _a at PAGE
-; CHECK-APPLE-NEXT:    mov x19, x0
 ; CHECK-APPLE-NEXT:  Lloh3:
 ; CHECK-APPLE-NEXT:    ldrsw x8, [x8, _a at PAGEOFF]
 ; CHECK-APPLE-NEXT:    cmp x8, x0
 ; CHECK-APPLE-NEXT:    b.ne LBB1_3
 ; CHECK-APPLE-NEXT:  ; %bb.2: ; %if.then2
+; CHECK-APPLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-NEXT:    mov x19, x0
 ; CHECK-APPLE-NEXT:    bl _callVoid
 ; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; CHECK-APPLE-NEXT:    mov x0, x19
@@ -143,15 +136,7 @@ define dso_local signext i32 @test2(ptr %p1) local_unnamed_addr uwtable  {
 ; CHECK-APPLE-NEXT:    .cfi_restore w20
 ; CHECK-APPLE-NEXT:    b _callNonVoid
 ; CHECK-APPLE-NEXT:  LBB1_3: ; %return
-; CHECK-APPLE-NEXT:    .cfi_restore_state
-; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; CHECK-APPLE-NEXT:    mov w0, wzr
-; CHECK-APPLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
-; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-APPLE-NEXT:    .cfi_restore w30
-; CHECK-APPLE-NEXT:    .cfi_restore w29
-; CHECK-APPLE-NEXT:    .cfi_restore w19
-; CHECK-APPLE-NEXT:    .cfi_restore w20
 ; CHECK-APPLE-NEXT:    ret
 ; CHECK-APPLE-NEXT:    .loh AdrpLdr Lloh2, Lloh3
 entry:

diff  --git a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
index 4a903863acef7d9..a965a20244b4e75 100644
--- a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
+++ b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
@@ -12,8 +12,7 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
 ; CHECK-NEXT:    s_load_dwordx8 s[36:43], s[8:9], 0x0
 ; CHECK-NEXT:    s_add_u32 s0, s0, s17
 ; CHECK-NEXT:    s_addc_u32 s1, s1, 0
-; CHECK-NEXT:    s_mov_b64 s[34:35], s[8:9]
-; CHECK-NEXT:    s_mov_b32 s8, 0
+; CHECK-NEXT:    s_mov_b32 s12, 0
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    s_cmp_lg_u32 s40, 0
 ; CHECK-NEXT:    s_cbranch_scc1 .LBB0_8
@@ -22,51 +21,54 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
 ; CHECK-NEXT:    s_cbranch_scc1 .LBB0_4
 ; CHECK-NEXT:  ; %bb.2: ; %if.else251.i.i
 ; CHECK-NEXT:    s_cmp_lg_u32 s43, 0
-; CHECK-NEXT:    s_mov_b32 s12, 0
-; CHECK-NEXT:    s_cselect_b32 s8, -1, 0
-; CHECK-NEXT:    s_and_b32 vcc_lo, exec_lo, s8
+; CHECK-NEXT:    s_mov_b32 s17, 0
+; CHECK-NEXT:    s_cselect_b32 s12, -1, 0
+; CHECK-NEXT:    s_and_b32 vcc_lo, exec_lo, s12
 ; CHECK-NEXT:    s_cbranch_vccz .LBB0_5
 ; CHECK-NEXT:  ; %bb.3:
 ; CHECK-NEXT:    s_mov_b32 s36, 0
-; CHECK-NEXT:    s_andn2_b32 vcc_lo, exec_lo, s8
+; CHECK-NEXT:    s_andn2_b32 vcc_lo, exec_lo, s12
 ; CHECK-NEXT:    s_cbranch_vccz .LBB0_6
 ; CHECK-NEXT:    s_branch .LBB0_7
 ; CHECK-NEXT:  .LBB0_4:
-; CHECK-NEXT:    s_mov_b32 s10, s8
-; CHECK-NEXT:    s_mov_b32 s11, s8
-; CHECK-NEXT:    s_mov_b32 s9, s8
-; CHECK-NEXT:    s_mov_b64 s[38:39], s[10:11]
-; CHECK-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; CHECK-NEXT:    s_mov_b32 s14, s12
+; CHECK-NEXT:    s_mov_b32 s15, s12
+; CHECK-NEXT:    s_mov_b32 s13, s12
+; CHECK-NEXT:    s_mov_b64 s[38:39], s[14:15]
+; CHECK-NEXT:    s_mov_b64 s[36:37], s[12:13]
 ; CHECK-NEXT:    s_branch .LBB0_7
 ; CHECK-NEXT:  .LBB0_5: ; %if.then263.i.i
-; CHECK-NEXT:    v_cmp_lt_f32_e64 s8, s41, 0
+; CHECK-NEXT:    v_cmp_lt_f32_e64 s12, s41, 0
 ; CHECK-NEXT:    s_mov_b32 s36, 1.0
-; CHECK-NEXT:    s_mov_b32 s12, 0x7fc00000
+; CHECK-NEXT:    s_mov_b32 s17, 0x7fc00000
 ; CHECK-NEXT:    s_mov_b32 s37, s36
 ; CHECK-NEXT:    s_mov_b32 s38, s36
 ; CHECK-NEXT:    s_mov_b32 s39, s36
-; CHECK-NEXT:    s_andn2_b32 vcc_lo, exec_lo, s8
+; CHECK-NEXT:    s_andn2_b32 vcc_lo, exec_lo, s12
 ; CHECK-NEXT:    s_cbranch_vccnz .LBB0_7
 ; CHECK-NEXT:  .LBB0_6: ; %if.end273.i.i
-; CHECK-NEXT:    s_add_u32 s8, s34, 40
-; CHECK-NEXT:    s_addc_u32 s9, s35, 0
+; CHECK-NEXT:    s_add_u32 s12, s8, 40
+; CHECK-NEXT:    s_addc_u32 s13, s9, 0
 ; CHECK-NEXT:    s_getpc_b64 s[18:19]
 ; CHECK-NEXT:    s_add_u32 s18, s18, _Z3dotDv3_fS_ at gotpcrel32@lo+4
 ; CHECK-NEXT:    s_addc_u32 s19, s19, _Z3dotDv3_fS_ at gotpcrel32@hi+12
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; CHECK-NEXT:    s_load_dwordx2 s[18:19], s[18:19], 0x0
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v3, 10, v1
-; CHECK-NEXT:    v_add_f32_e64 v1, s12, s36
+; CHECK-NEXT:    v_add_f32_e64 v1, s17, s36
+; CHECK-NEXT:    s_mov_b64 s[34:35], s[8:9]
+; CHECK-NEXT:    s_mov_b64 s[8:9], s[12:13]
 ; CHECK-NEXT:    s_mov_b32 s12, s14
-; CHECK-NEXT:    s_mov_b32 s13, s15
-; CHECK-NEXT:    s_mov_b32 s14, s16
 ; CHECK-NEXT:    v_or3_b32 v31, v0, v3, v2
 ; CHECK-NEXT:    v_mov_b32_e32 v0, v1
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
 ; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    s_mov_b32 s13, s15
+; CHECK-NEXT:    s_mov_b32 s14, s16
 ; CHECK-NEXT:    s_mov_b32 s36, 0
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; CHECK-NEXT:    s_mov_b64 s[8:9], s[34:35]
 ; CHECK-NEXT:    s_mov_b32 s37, s36
 ; CHECK-NEXT:    s_mov_b32 s38, s36
 ; CHECK-NEXT:    s_mov_b32 s39, s36
@@ -77,7 +79,7 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
 ; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
 ; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CHECK-NEXT:  .LBB0_8: ; %kernel_direct_lighting.exit
-; CHECK-NEXT:    s_load_dwordx2 s[4:5], s[34:35], 0x20
+; CHECK-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x20
 ; CHECK-NEXT:    v_mov_b32_e32 v0, s36
 ; CHECK-NEXT:    v_mov_b32_e32 v4, 0
 ; CHECK-NEXT:    v_mov_b32_e32 v1, s37

diff  --git a/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir
index 3374805409b3a74..f59c42283e981f5 100644
--- a/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir
@@ -67,7 +67,7 @@ body:             |
   ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY killed renamable $sgpr36_sgpr37
   ; CHECK-NEXT:   $sgpr12 = COPY killed renamable $sgpr42
   ; CHECK-NEXT:   $sgpr13 = COPY killed renamable $sgpr33
-  ; CHECK-NEXT:   dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr68_sgpr69, 0, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr68_sgpr69, 0, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
   ; CHECK-NEXT:   renamable $sgpr4_sgpr5 = COPY $exec, implicit-def $exec
   ; CHECK-NEXT:   dead renamable $sgpr6_sgpr7 = IMPLICIT_DEF
@@ -85,7 +85,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75
   ; CHECK-NEXT:   renamable $sgpr6 = S_LSHL_B32 renamable $sgpr67, 1, implicit-def dead $scc
-  ; CHECK-NEXT:   dead [[COPY]]:vreg_1024 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 [[COPY]], 0, killed $sgpr6, 3, implicit-def $m0, implicit $m0, implicit $exec
+  ; CHECK-NEXT:   dead [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32_:%[0-9]+]]:vreg_1024 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32_]], 0, killed $sgpr6, 3, implicit-def $m0, implicit $m0, implicit $exec
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3:
   ; CHECK-NEXT:   successors: %bb.5(0x40000000), %bb.1(0x40000000)
@@ -114,7 +114,7 @@ body:             |
   ; CHECK-NEXT:   renamable $sgpr87 = COPY renamable $sgpr44
   ; CHECK-NEXT:   renamable $sgpr88 = COPY renamable $sgpr44
   ; CHECK-NEXT:   renamable $sgpr89 = COPY renamable $sgpr44
-  ; CHECK-NEXT:   dead %18:vreg_1024 = COPY renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99, implicit $exec
+  ; CHECK-NEXT:   dead [[COPY:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99, implicit $exec
   ; CHECK-NEXT:   $exec = S_XOR_B64_term $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
   ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.5, implicit $exec
   ; CHECK-NEXT:   S_BRANCH %bb.1
@@ -125,7 +125,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
   ; CHECK-NEXT:   dead renamable $sgpr4 = S_LSHL_B32 killed renamable $sgpr66, 1, implicit-def dead $scc
-  ; CHECK-NEXT:   dead %16:vreg_1024 = COPY renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75
+  ; CHECK-NEXT:   dead [[COPY1:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.5:
   bb.0:

diff  --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
index 3cdf238ddd4b57f..84a0cc6c9220a10 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
@@ -55,15 +55,14 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
 ; GFX11-NEXT:    s_mov_b64 s[16:17], s[4:5]
 ; GFX11-NEXT:    v_mov_b32_e32 v31, v0
 ; GFX11-NEXT:    s_load_b32 s24, s[16:17], 0x24
-; GFX11-NEXT:    s_mov_b32 s18, s14
 ; GFX11-NEXT:    s_mov_b32 s12, s13
 ; GFX11-NEXT:    s_mov_b64 s[10:11], s[6:7]
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
 ; GFX11-NEXT:    s_mov_b64 s[6:7], s[2:3]
+; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
 ; GFX11-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX11-NEXT:    s_mov_b32 s3, 0
 ; GFX11-NEXT:    s_mov_b32 s0, -1
-; GFX11-NEXT:    s_mov_b32 s19, exec_lo
+; GFX11-NEXT:    s_mov_b32 s18, exec_lo
 ; GFX11-NEXT:    s_mov_b32 s32, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    v_mul_lo_u32 v0, s24, v0
@@ -72,11 +71,11 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
 ; GFX11-NEXT:    s_cbranch_execz .LBB2_13
 ; GFX11-NEXT:  ; %bb.1: ; %bb14
 ; GFX11-NEXT:    s_load_b128 s[20:23], s[16:17], 0x2c
+; GFX11-NEXT:    s_mov_b32 s19, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_bitcmp1_b32 s21, 0
 ; GFX11-NEXT:    s_cselect_b32 s25, -1, 0
 ; GFX11-NEXT:    s_bitcmp0_b32 s21, 0
-; GFX11-NEXT:    s_mov_b32 s21, 0
 ; GFX11-NEXT:    s_cbranch_scc0 .LBB2_3
 ; GFX11-NEXT:  ; %bb.2: ; %bb15
 ; GFX11-NEXT:    s_add_u32 s8, s16, 0x58
@@ -84,11 +83,13 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
 ; GFX11-NEXT:    s_getpc_b64 s[0:1]
 ; GFX11-NEXT:    s_add_u32 s0, s0, f0 at gotpcrel32@lo+4
 ; GFX11-NEXT:    s_addc_u32 s1, s1, f0 at gotpcrel32@hi+12
-; GFX11-NEXT:    s_mov_b32 s13, s18
+; GFX11-NEXT:    s_mov_b32 s13, s14
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT:    s_mov_b32 s21, s14
 ; GFX11-NEXT:    s_mov_b32 s14, s15
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
+; GFX11-NEXT:    s_mov_b32 s14, s21
 ; GFX11-NEXT:    s_mov_b32 s1, -1
 ; GFX11-NEXT:    s_and_not1_b32 vcc_lo, exec_lo, s3
 ; GFX11-NEXT:    s_cbranch_vccz .LBB2_4
@@ -125,8 +126,8 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
 ; GFX11-NEXT:    s_mul_i32 s2, s2, s20
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_or_b32 s2, s24, s2
-; GFX11-NEXT:    s_lshl_b64 s[22:23], s[2:3], 1
-; GFX11-NEXT:    global_load_u16 v2, v1, s[22:23]
+; GFX11-NEXT:    s_lshl_b64 s[20:21], s[2:3], 1
+; GFX11-NEXT:    global_load_u16 v2, v1, s[20:21]
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v2
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
@@ -165,13 +166,13 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
 ; GFX11-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
 ; GFX11-NEXT:    s_cbranch_vccz .LBB2_10
 ; GFX11-NEXT:  ; %bb.11: ; %Flow6
-; GFX11-NEXT:    s_mov_b32 s21, -1
+; GFX11-NEXT:    s_mov_b32 s19, -1
 ; GFX11-NEXT:  .LBB2_12: ; %Flow11
 ; GFX11-NEXT:    s_and_b32 s3, s1, exec_lo
-; GFX11-NEXT:    s_or_not1_b32 s0, s21, exec_lo
+; GFX11-NEXT:    s_or_not1_b32 s0, s19, exec_lo
 ; GFX11-NEXT:  .LBB2_13: ; %Flow9
-; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s19
-; GFX11-NEXT:    s_and_saveexec_b32 s19, s0
+; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s18
+; GFX11-NEXT:    s_and_saveexec_b32 s18, s0
 ; GFX11-NEXT:    s_cbranch_execz .LBB2_15
 ; GFX11-NEXT:  ; %bb.14: ; %bb43
 ; GFX11-NEXT:    s_add_u32 s8, s16, 0x58
@@ -179,14 +180,14 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
 ; GFX11-NEXT:    s_getpc_b64 s[0:1]
 ; GFX11-NEXT:    s_add_u32 s0, s0, f0 at gotpcrel32@lo+4
 ; GFX11-NEXT:    s_addc_u32 s1, s1, f0 at gotpcrel32@hi+12
-; GFX11-NEXT:    s_mov_b32 s13, s18
+; GFX11-NEXT:    s_mov_b32 s13, s14
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
 ; GFX11-NEXT:    s_mov_b32 s14, s15
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
 ; GFX11-NEXT:    s_or_b32 s3, s3, exec_lo
 ; GFX11-NEXT:  .LBB2_15: ; %Flow14
-; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s19
+; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s18
 ; GFX11-NEXT:    s_and_saveexec_b32 s0, s3
 ; GFX11-NEXT:  ; %bb.16: ; %UnifiedUnreachableBlock
 ; GFX11-NEXT:    ; divergent unreachable

diff  --git a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
index 0216570253a4c48..2b613cbbabeee79 100644
--- a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
+++ b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
@@ -40,91 +40,71 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $sgpr33 = COPY $sgpr14
-  ; CHECK-NEXT:   renamable $sgpr34_sgpr35 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   renamable $sgpr12_sgpr13 = V_CMP_GT_I32_e64 1, undef %18:vgpr_32, implicit $exec
-  ; CHECK-NEXT:   renamable $sgpr18_sgpr19 = V_CMP_EQ_U32_e64 0, undef %18:vgpr_32, implicit $exec
-  ; CHECK-NEXT:   renamable $sgpr20_sgpr21 = V_CMP_NE_U32_e64 0, undef %18:vgpr_32, implicit $exec
-  ; CHECK-NEXT:   renamable $sgpr22_sgpr23 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec
-  ; CHECK-NEXT:   renamable $sgpr52 = S_MOV_B32 0
-  ; CHECK-NEXT:   renamable $sgpr24_sgpr25 = V_CMP_EQ_U32_e64 undef $sgpr4, undef %18:vgpr_32, implicit $exec
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, implicit $exec
-  ; CHECK-NEXT:   renamable $sgpr100_sgpr101 = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec
-  ; CHECK-NEXT:   renamable $sgpr53 = S_MOV_B32 1083786240
-  ; CHECK-NEXT:   SI_SPILL_S1024_SAVE renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.1, align 4, addrspace 5)
+  ; CHECK-NEXT:   renamable $sgpr34_sgpr35 = V_CMP_GT_I32_e64 1, undef %18:vgpr_32, implicit $exec
+  ; CHECK-NEXT:   renamable $sgpr36_sgpr37 = V_CMP_EQ_U32_e64 0, undef %18:vgpr_32, implicit $exec
+  ; CHECK-NEXT:   renamable $sgpr38_sgpr39 = V_CMP_NE_U32_e64 0, undef %18:vgpr_32, implicit $exec
+  ; CHECK-NEXT:   renamable $sgpr40_sgpr41 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec
+  ; CHECK-NEXT:   renamable $sgpr60 = S_MOV_B32 0
+  ; CHECK-NEXT:   renamable $sgpr42_sgpr43 = V_CMP_EQ_U32_e64 undef $sgpr4, undef %18:vgpr_32, implicit $exec
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit $exec
+  ; CHECK-NEXT:   renamable $sgpr44_sgpr45 = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec
+  ; CHECK-NEXT:   renamable $sgpr61 = S_MOV_B32 1083786240
   ; CHECK-NEXT:   S_BRANCH %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.17(0x40000000)
-  ; CHECK-NEXT:   liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $vcc = S_AND_B64 $exec, renamable $sgpr100_sgpr101, implicit-def dead $scc
+  ; CHECK-NEXT:   $vcc = S_AND_B64 $exec, renamable $sgpr44_sgpr45, implicit-def dead $scc
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vreg_1024_align2 = COPY [[COPY]]
   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.2, implicit $vcc
   ; CHECK-NEXT:   S_BRANCH %bb.17
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   successors: %bb.11(0x40000000), %bb.5(0x40000000)
-  ; CHECK-NEXT:   liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
-  ; CHECK-NEXT:   renamable $sgpr36 = COPY renamable $sgpr68
-  ; CHECK-NEXT:   renamable $sgpr37 = COPY renamable $sgpr68
-  ; CHECK-NEXT:   renamable $sgpr38 = COPY renamable $sgpr68
-  ; CHECK-NEXT:   renamable $sgpr39 = COPY renamable $sgpr68
-  ; CHECK-NEXT:   renamable $sgpr40 = COPY renamable $sgpr68
-  ; CHECK-NEXT:   renamable $sgpr41 = COPY renamable $sgpr68
-  ; CHECK-NEXT:   renamable $sgpr42 = COPY renamable $sgpr68
-  ; CHECK-NEXT:   renamable $sgpr43 = COPY renamable $sgpr68
-  ; CHECK-NEXT:   renamable $sgpr44 = COPY renamable $sgpr68
-  ; CHECK-NEXT:   renamable $sgpr45 = COPY renamable $sgpr68
-  ; CHECK-NEXT:   renamable $sgpr46 = COPY renamable $sgpr68
-  ; CHECK-NEXT:   renamable $sgpr47 = COPY renamable $sgpr68
-  ; CHECK-NEXT:   renamable $sgpr48 = COPY renamable $sgpr68
-  ; CHECK-NEXT:   renamable $sgpr49 = COPY renamable $sgpr68
-  ; CHECK-NEXT:   renamable $sgpr50 = COPY renamable $sgpr68
-  ; CHECK-NEXT:   renamable $sgpr51 = COPY killed renamable $sgpr68
-  ; CHECK-NEXT:   renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
-  ; CHECK-NEXT:   renamable $sgpr52 = COPY killed renamable $sgpr84
-  ; CHECK-NEXT:   renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
-  ; CHECK-NEXT:   renamable $sgpr53 = COPY killed renamable $sgpr72
-  ; CHECK-NEXT:   renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
-  ; CHECK-NEXT:   renamable $sgpr54 = COPY killed renamable $sgpr72
-  ; CHECK-NEXT:   renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
-  ; CHECK-NEXT:   renamable $sgpr55 = COPY killed renamable $sgpr72
-  ; CHECK-NEXT:   renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
-  ; CHECK-NEXT:   renamable $sgpr56 = COPY killed renamable $sgpr72
-  ; CHECK-NEXT:   renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
-  ; CHECK-NEXT:   renamable $sgpr57 = COPY killed renamable $sgpr76
-  ; CHECK-NEXT:   renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
-  ; CHECK-NEXT:   renamable $sgpr58 = COPY killed renamable $sgpr76
-  ; CHECK-NEXT:   renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
-  ; CHECK-NEXT:   renamable $sgpr59 = COPY killed renamable $sgpr76
-  ; CHECK-NEXT:   renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
-  ; CHECK-NEXT:   renamable $sgpr60 = COPY killed renamable $sgpr76
-  ; CHECK-NEXT:   renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
-  ; CHECK-NEXT:   renamable $sgpr61 = COPY killed renamable $sgpr80
-  ; CHECK-NEXT:   renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
-  ; CHECK-NEXT:   renamable $sgpr62 = COPY killed renamable $sgpr80
-  ; CHECK-NEXT:   renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
-  ; CHECK-NEXT:   renamable $sgpr63 = COPY killed renamable $sgpr80
-  ; CHECK-NEXT:   renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
-  ; CHECK-NEXT:   renamable $sgpr64 = COPY killed renamable $sgpr80
-  ; CHECK-NEXT:   renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
-  ; CHECK-NEXT:   renamable $sgpr65 = COPY killed renamable $sgpr84
-  ; CHECK-NEXT:   renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
-  ; CHECK-NEXT:   renamable $sgpr66 = COPY killed renamable $sgpr84
-  ; CHECK-NEXT:   renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
-  ; CHECK-NEXT:   renamable $sgpr67 = COPY killed renamable $sgpr84
-  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vreg_1024_align2 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, implicit $exec
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $sgpr64 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr65 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr66 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr67 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr68 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr69 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr70 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr71 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr72 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr73 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr74 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr75 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr76 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr77 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr78 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr79 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr80 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr81 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr82 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr83 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr84 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr85 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr86 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr87 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr88 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr89 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr90 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr91 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr92 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr93 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr94 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   renamable $sgpr95 = COPY renamable $sgpr60
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vreg_1024_align2 = COPY killed renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit $exec
   ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.11, implicit $exec
   ; CHECK-NEXT:   S_BRANCH %bb.5
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3:
   ; CHECK-NEXT:   successors: %bb.4(0x80000000)
-  ; CHECK-NEXT:   liveins: $sgpr15, $sgpr16, $sgpr33
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $sgpr60 = COPY killed renamable $sgpr33
+  ; CHECK-NEXT:   renamable $sgpr60 = COPY killed renamable $sgpr14
   ; CHECK-NEXT:   renamable $sgpr62 = COPY killed renamable $sgpr15
   ; CHECK-NEXT:   SI_SPILL_S32_SAVE killed renamable $sgpr16, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
@@ -145,124 +125,117 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.5:
   ; CHECK-NEXT:   successors: %bb.12(0x40000000), %bb.6(0x40000000)
-  ; CHECK-NEXT:   liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $sgpr4_sgpr5 = S_AND_B64 renamable $sgpr20_sgpr21, undef renamable $sgpr88_sgpr89, implicit-def dead $scc
-  ; CHECK-NEXT:   renamable $sgpr88_sgpr89 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec
-  ; CHECK-NEXT:   $exec = S_MOV_B64_term killed renamable $sgpr4_sgpr5
+  ; CHECK-NEXT:   renamable $sgpr12_sgpr13 = S_AND_B64 renamable $sgpr38_sgpr39, undef renamable $sgpr46_sgpr47, implicit-def dead $scc
+  ; CHECK-NEXT:   renamable $sgpr46_sgpr47 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec
+  ; CHECK-NEXT:   $exec = S_MOV_B64_term killed renamable $sgpr12_sgpr13
   ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.12, implicit $exec
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.6:
   ; CHECK-NEXT:   successors: %bb.7(0x80000000)
-  ; CHECK-NEXT:   liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr100_sgpr101
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   dead [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr22_sgpr23, implicit $exec
+  ; CHECK-NEXT:   dead [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr40_sgpr41, implicit $exec
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.7:
   ; CHECK-NEXT:   successors: %bb.8(0x80000000)
-  ; CHECK-NEXT:   liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr100_sgpr101
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $sgpr90_sgpr91 = nofpexcept V_CMP_NLT_F64_e64 0, undef $sgpr4_sgpr5, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec
-  ; CHECK-NEXT:   renamable $sgpr92_sgpr93 = nofpexcept V_CMP_NLT_F64_e64 0, 4607182418800017408, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec
-  ; CHECK-NEXT:   dead [[V_INDIRECT_REG_READ_GPR_IDX_B32_V32_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 [[COPY1]], undef $sgpr33, 11, implicit-def $m0, implicit $m0, implicit $exec
+  ; CHECK-NEXT:   renamable $sgpr48_sgpr49 = nofpexcept V_CMP_NLT_F64_e64 0, undef $sgpr4_sgpr5, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   renamable $sgpr50_sgpr51 = nofpexcept V_CMP_NLT_F64_e64 0, 4607182418800017408, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   dead [[V_INDIRECT_REG_READ_GPR_IDX_B32_V32_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 [[COPY1]], undef $sgpr14, 11, implicit-def $m0, implicit $m0, implicit $exec
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.8:
   ; CHECK-NEXT:   successors: %bb.10(0x40000000), %bb.9(0x40000000)
-  ; CHECK-NEXT:   liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr90_sgpr91, $sgpr92_sgpr93, $sgpr100_sgpr101
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $vcc = S_AND_B64 $exec, renamable $sgpr90_sgpr91, implicit-def dead $scc
+  ; CHECK-NEXT:   $vcc = S_AND_B64 $exec, renamable $sgpr48_sgpr49, implicit-def dead $scc
   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.10, implicit $vcc
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.9:
   ; CHECK-NEXT:   successors: %bb.10(0x40000000), %bb.17(0x40000000)
-  ; CHECK-NEXT:   liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr90_sgpr91, $sgpr92_sgpr93, $sgpr100_sgpr101
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
-  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY killed renamable $sgpr68_sgpr69, implicit $exec
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY renamable $sgpr60_sgpr61, implicit $exec
   ; CHECK-NEXT:   GLOBAL_STORE_DWORDX2_SADDR undef %18:vgpr_32, [[COPY2]], undef renamable $sgpr4_sgpr5, 0, 0, implicit $exec :: (store (s64), addrspace 1)
-  ; CHECK-NEXT:   [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec
-  ; CHECK-NEXT:   dead renamable $sgpr4_sgpr5 = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_1]], implicit $exec
-  ; CHECK-NEXT:   renamable $sgpr64 = S_ADD_U32 renamable $sgpr8, 32, implicit-def dead $scc
+  ; CHECK-NEXT:   [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr34_sgpr35, implicit $exec
+  ; CHECK-NEXT:   dead renamable $sgpr12_sgpr13 = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_1]], implicit $exec
+  ; CHECK-NEXT:   renamable $sgpr58 = S_ADD_U32 renamable $sgpr8, 32, implicit-def dead $scc
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY renamable $sgpr34_sgpr35
-  ; CHECK-NEXT:   renamable $sgpr42_sgpr43 = COPY killed renamable $sgpr6_sgpr7
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY renamable $sgpr42_sgpr43
-  ; CHECK-NEXT:   renamable $sgpr38_sgpr39 = COPY killed renamable $sgpr10_sgpr11
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY renamable $sgpr38_sgpr39
-  ; CHECK-NEXT:   renamable $sgpr44_sgpr45 = COPY killed renamable $sgpr12_sgpr13
-  ; CHECK-NEXT:   $sgpr12 = COPY renamable $sgpr33
+  ; CHECK-NEXT:   renamable $sgpr52_sgpr53 = COPY killed renamable $sgpr4_sgpr5
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY renamable $sgpr52_sgpr53
+  ; CHECK-NEXT:   renamable $sgpr54_sgpr55 = COPY killed renamable $sgpr6_sgpr7
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY renamable $sgpr54_sgpr55
+  ; CHECK-NEXT:   renamable $sgpr56_sgpr57 = COPY killed renamable $sgpr10_sgpr11
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY renamable $sgpr56_sgpr57
+  ; CHECK-NEXT:   $sgpr12 = COPY renamable $sgpr14
   ; CHECK-NEXT:   $sgpr13 = COPY renamable $sgpr15
-  ; CHECK-NEXT:   renamable $sgpr36 = COPY killed renamable $sgpr16
-  ; CHECK-NEXT:   renamable $sgpr37 = COPY killed renamable $sgpr15
-  ; CHECK-NEXT:   renamable $sgpr40 = COPY killed renamable $sgpr8
-  ; CHECK-NEXT:   renamable $sgpr46_sgpr47 = COPY killed renamable $sgpr18_sgpr19
-  ; CHECK-NEXT:   renamable $sgpr48_sgpr49 = COPY killed renamable $sgpr20_sgpr21
-  ; CHECK-NEXT:   renamable $sgpr50_sgpr51 = COPY killed renamable $sgpr22_sgpr23
-  ; CHECK-NEXT:   renamable $sgpr52_sgpr53 = COPY killed renamable $sgpr24_sgpr25
-  ; CHECK-NEXT:   dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13
+  ; CHECK-NEXT:   renamable $sgpr62 = COPY killed renamable $sgpr8
+  ; CHECK-NEXT:   renamable $sgpr33 = COPY killed renamable $sgpr16
+  ; CHECK-NEXT:   renamable $sgpr59 = COPY killed renamable $sgpr15
+  ; CHECK-NEXT:   renamable $sgpr63 = COPY killed renamable $sgpr14
+  ; CHECK-NEXT:   dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
-  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY renamable $sgpr64_sgpr65
-  ; CHECK-NEXT:   dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr8_sgpr9
-  ; CHECK-NEXT:   renamable $sgpr24_sgpr25 = COPY killed renamable $sgpr52_sgpr53
-  ; CHECK-NEXT:   renamable $sgpr22_sgpr23 = COPY killed renamable $sgpr50_sgpr51
-  ; CHECK-NEXT:   renamable $sgpr20_sgpr21 = COPY killed renamable $sgpr48_sgpr49
-  ; CHECK-NEXT:   renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr46_sgpr47
-  ; CHECK-NEXT:   renamable $sgpr12_sgpr13 = COPY killed renamable $sgpr44_sgpr45
-  ; CHECK-NEXT:   renamable $sgpr6_sgpr7 = COPY killed renamable $sgpr42_sgpr43
-  ; CHECK-NEXT:   renamable $sgpr8 = COPY killed renamable $sgpr40
-  ; CHECK-NEXT:   renamable $sgpr10_sgpr11 = COPY killed renamable $sgpr38_sgpr39
-  ; CHECK-NEXT:   renamable $sgpr15 = COPY killed renamable $sgpr37
-  ; CHECK-NEXT:   renamable $sgpr16 = COPY killed renamable $sgpr36
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY renamable $sgpr58_sgpr59
+  ; CHECK-NEXT:   dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr8_sgpr9
+  ; CHECK-NEXT:   renamable $sgpr14 = COPY killed renamable $sgpr63
+  ; CHECK-NEXT:   renamable $sgpr15 = COPY killed renamable $sgpr59
+  ; CHECK-NEXT:   renamable $sgpr16 = COPY killed renamable $sgpr33
+  ; CHECK-NEXT:   renamable $sgpr4_sgpr5 = COPY killed renamable $sgpr52_sgpr53
+  ; CHECK-NEXT:   renamable $sgpr6_sgpr7 = COPY killed renamable $sgpr54_sgpr55
+  ; CHECK-NEXT:   renamable $sgpr8 = COPY killed renamable $sgpr62
+  ; CHECK-NEXT:   renamable $sgpr10_sgpr11 = COPY killed renamable $sgpr56_sgpr57
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
-  ; CHECK-NEXT:   $exec = S_MOV_B64_term renamable $sgpr92_sgpr93
+  ; CHECK-NEXT:   $exec = S_MOV_B64_term renamable $sgpr50_sgpr51
   ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.10, implicit $exec
   ; CHECK-NEXT:   S_BRANCH %bb.17
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.10:
   ; CHECK-NEXT:   successors: %bb.8(0x40000000), %bb.12(0x40000000)
-  ; CHECK-NEXT:   liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr90_sgpr91, $sgpr92_sgpr93, $sgpr100_sgpr101
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.8, implicit $exec
   ; CHECK-NEXT:   S_BRANCH %bb.12
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.11:
   ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.17(0x40000000)
-  ; CHECK-NEXT:   liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.1, implicit $exec
   ; CHECK-NEXT:   S_BRANCH %bb.17
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.12:
   ; CHECK-NEXT:   successors: %bb.11(0x40000000), %bb.13(0x40000000)
-  ; CHECK-NEXT:   liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr100_sgpr101
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $exec = S_MOV_B64_term killed renamable $sgpr88_sgpr89
+  ; CHECK-NEXT:   $exec = S_MOV_B64_term killed renamable $sgpr46_sgpr47
   ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.11, implicit $exec
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.13:
   ; CHECK-NEXT:   successors: %bb.15(0x40000000), %bb.14(0x40000000)
-  ; CHECK-NEXT:   liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $vcc = S_AND_B64 $exec, renamable $sgpr24_sgpr25, implicit-def dead $scc
+  ; CHECK-NEXT:   $vcc = S_AND_B64 $exec, renamable $sgpr42_sgpr43, implicit-def dead $scc
   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.15, implicit $vcc
   ; CHECK-NEXT:   S_BRANCH %bb.14
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.14:
   ; CHECK-NEXT:   successors: %bb.15(0x80000000)
-  ; CHECK-NEXT:   liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.15:
   ; CHECK-NEXT:   successors: %bb.11(0x40000000), %bb.16(0x40000000)
-  ; CHECK-NEXT:   liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $vcc = S_AND_B64 $exec, renamable $sgpr18_sgpr19, implicit-def dead $scc
+  ; CHECK-NEXT:   $vcc = S_AND_B64 $exec, renamable $sgpr36_sgpr37, implicit-def dead $scc
   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.11, implicit $vcc
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.16:
   ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.17(0x40000000)
-  ; CHECK-NEXT:   liveins: $sgpr15, $sgpr16, $sgpr33
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.3, implicit $exec
   ; CHECK-NEXT: {{  $}}

diff  --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
index 9ca20fab2012155..2f82260888a7da0 100644
--- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
+++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
@@ -30,53 +30,53 @@ define hidden void @widget() {
 ; GCN-NEXT:    v_writelane_b32 v40, s45, 13
 ; GCN-NEXT:    v_writelane_b32 v40, s46, 14
 ; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_mov_b32_e32 v41, v31
-; GCN-NEXT:    s_mov_b32 s42, s15
-; GCN-NEXT:    s_mov_b32 s43, s14
-; GCN-NEXT:    s_mov_b32 s44, s13
-; GCN-NEXT:    s_mov_b32 s45, s12
-; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0
 ; GCN-NEXT:    flat_load_dword v0, v[0:1]
-; GCN-NEXT:    s_mov_b64 s[4:5], 0
-; GCN-NEXT:    s_mov_b64 s[8:9], -1
+; GCN-NEXT:    s_mov_b64 s[16:17], 0
+; GCN-NEXT:    s_mov_b64 s[20:21], -1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_cmp_gt_i32_e32 vcc, 21, v0
 ; GCN-NEXT:    s_mov_b64 s[46:47], 0
-; GCN-NEXT:    s_mov_b64 s[6:7], 0
+; GCN-NEXT:    s_mov_b64 s[18:19], 0
 ; GCN-NEXT:    s_cbranch_vccz .LBB0_9
 ; GCN-NEXT:  ; %bb.1: ; %Flow
-; GCN-NEXT:    s_andn2_b64 vcc, exec, s[8:9]
+; GCN-NEXT:    s_andn2_b64 vcc, exec, s[20:21]
 ; GCN-NEXT:    s_cbranch_vccz .LBB0_10
 ; GCN-NEXT:  .LBB0_2: ; %Flow1
-; GCN-NEXT:    s_andn2_b64 vcc, exec, s[6:7]
+; GCN-NEXT:    s_andn2_b64 vcc, exec, s[18:19]
 ; GCN-NEXT:    s_cbranch_vccnz .LBB0_4
 ; GCN-NEXT:  .LBB0_3: ; %bb9
 ; GCN-NEXT:    s_getpc_b64 s[16:17]
 ; GCN-NEXT:    s_add_u32 s16, s16, wibble at rel32@lo+4
 ; GCN-NEXT:    s_addc_u32 s17, s17, wibble at rel32@hi+12
-; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT:    s_mov_b32 s12, s45
-; GCN-NEXT:    s_mov_b32 s13, s44
-; GCN-NEXT:    s_mov_b32 s14, s43
-; GCN-NEXT:    s_mov_b32 s15, s42
-; GCN-NEXT:    v_mov_b32_e32 v31, v41
+; GCN-NEXT:    s_mov_b64 s[34:35], s[4:5]
+; GCN-NEXT:    s_mov_b64 s[36:37], s[6:7]
+; GCN-NEXT:    s_mov_b64 s[38:39], s[8:9]
+; GCN-NEXT:    s_mov_b64 s[40:41], s[10:11]
+; GCN-NEXT:    s_mov_b32 s42, s12
+; GCN-NEXT:    s_mov_b32 s43, s13
+; GCN-NEXT:    s_mov_b32 s44, s14
+; GCN-NEXT:    s_mov_b32 s45, s15
+; GCN-NEXT:    v_mov_b32_e32 v41, v31
 ; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GCN-NEXT:    v_mov_b32_e32 v31, v41
+; GCN-NEXT:    s_mov_b32 s12, s42
+; GCN-NEXT:    s_mov_b32 s13, s43
+; GCN-NEXT:    s_mov_b32 s14, s44
+; GCN-NEXT:    s_mov_b32 s15, s45
+; GCN-NEXT:    s_mov_b64 s[4:5], s[34:35]
+; GCN-NEXT:    s_mov_b64 s[6:7], s[36:37]
+; GCN-NEXT:    s_mov_b64 s[8:9], s[38:39]
+; GCN-NEXT:    s_mov_b64 s[10:11], s[40:41]
 ; GCN-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
-; GCN-NEXT:    s_mov_b64 s[4:5], 0
-; GCN-NEXT:    s_andn2_b64 s[6:7], s[46:47], exec
-; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
-; GCN-NEXT:    s_or_b64 s[46:47], s[6:7], s[8:9]
+; GCN-NEXT:    s_mov_b64 s[16:17], 0
+; GCN-NEXT:    s_andn2_b64 s[18:19], s[46:47], exec
+; GCN-NEXT:    s_and_b64 s[20:21], vcc, exec
+; GCN-NEXT:    s_or_b64 s[46:47], s[18:19], s[20:21]
 ; GCN-NEXT:  .LBB0_4: ; %Flow2
-; GCN-NEXT:    s_and_saveexec_b64 s[6:7], s[46:47]
-; GCN-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; GCN-NEXT:    s_and_saveexec_b64 s[18:19], s[46:47]
+; GCN-NEXT:    s_xor_b64 s[18:19], exec, s[18:19]
 ; GCN-NEXT:    s_cbranch_execz .LBB0_6
 ; GCN-NEXT:  ; %bb.5: ; %bb12
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0
@@ -84,22 +84,13 @@ define hidden void @widget() {
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0
 ; GCN-NEXT:    flat_store_dword v[0:1], v2
 ; GCN-NEXT:  .LBB0_6: ; %Flow3
-; GCN-NEXT:    s_or_b64 exec, exec, s[6:7]
-; GCN-NEXT:    s_andn2_b64 vcc, exec, s[4:5]
+; GCN-NEXT:    s_or_b64 exec, exec, s[18:19]
+; GCN-NEXT:    s_andn2_b64 vcc, exec, s[16:17]
 ; GCN-NEXT:    s_cbranch_vccnz .LBB0_8
 ; GCN-NEXT:  ; %bb.7: ; %bb7
 ; GCN-NEXT:    s_getpc_b64 s[16:17]
 ; GCN-NEXT:    s_add_u32 s16, s16, wibble at rel32@lo+4
 ; GCN-NEXT:    s_addc_u32 s17, s17, wibble at rel32@hi+12
-; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT:    s_mov_b32 s12, s45
-; GCN-NEXT:    s_mov_b32 s13, s44
-; GCN-NEXT:    s_mov_b32 s14, s43
-; GCN-NEXT:    s_mov_b32 s15, s42
-; GCN-NEXT:    v_mov_b32_e32 v31, v41
 ; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GCN-NEXT:  .LBB0_8: ; %UnifiedReturnBlock
 ; GCN-NEXT:    v_readlane_b32 s47, v40, 15
@@ -129,13 +120,13 @@ define hidden void @widget() {
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ; GCN-NEXT:  .LBB0_9: ; %bb2
 ; GCN-NEXT:    v_cmp_eq_u32_e64 s[46:47], 21, v0
-; GCN-NEXT:    v_cmp_ne_u32_e64 s[6:7], 21, v0
+; GCN-NEXT:    v_cmp_ne_u32_e64 s[18:19], 21, v0
 ; GCN-NEXT:    s_mov_b64 vcc, exec
 ; GCN-NEXT:    s_cbranch_execnz .LBB0_2
 ; GCN-NEXT:  .LBB0_10: ; %bb4
-; GCN-NEXT:    s_mov_b64 s[4:5], -1
-; GCN-NEXT:    v_cmp_ne_u32_e64 s[6:7], 9, v0
-; GCN-NEXT:    s_andn2_b64 vcc, exec, s[6:7]
+; GCN-NEXT:    s_mov_b64 s[16:17], -1
+; GCN-NEXT:    v_cmp_ne_u32_e64 s[18:19], 9, v0
+; GCN-NEXT:    s_andn2_b64 vcc, exec, s[18:19]
 ; GCN-NEXT:    s_cbranch_vccz .LBB0_3
 ; GCN-NEXT:    s_branch .LBB0_4
 ; SI-OPT-LABEL: @widget(

diff  --git a/llvm/test/CodeGen/ARM/csr-split.ll b/llvm/test/CodeGen/ARM/csr-split.ll
index 8db2a14041c58d4..96c207fe148b44b 100644
--- a/llvm/test/CodeGen/ARM/csr-split.ll
+++ b/llvm/test/CodeGen/ARM/csr-split.ll
@@ -8,14 +8,13 @@
 define dso_local signext i32 @test1(ptr %b) local_unnamed_addr  {
 ; CHECK-LABEL: test1:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    push {r4, lr}
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    ldr r0, .LCPI0_0
-; CHECK-NEXT:    ldr r0, [r0]
-; CHECK-NEXT:    cmp r0, r4
-; CHECK-NEXT:    popne {r4, lr}
+; CHECK-NEXT:    ldr r1, .LCPI0_0
+; CHECK-NEXT:    ldr r1, [r1]
+; CHECK-NEXT:    cmp r1, r0
 ; CHECK-NEXT:    movne pc, lr
 ; CHECK-NEXT:  .LBB0_1: @ %if.then
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    bl callVoid
 ; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    pop {r4, lr}
@@ -48,20 +47,19 @@ declare signext i32 @callNonVoid(ptr) local_unnamed_addr
 define dso_local signext i32 @test2(ptr %p1) local_unnamed_addr  {
 ; CHECK-LABEL: test2:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    beq .LBB1_2
 ; CHECK-NEXT:  @ %bb.1: @ %if.end
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    ldr r0, .LCPI1_0
-; CHECK-NEXT:    ldr r0, [r0]
-; CHECK-NEXT:    cmp r0, r4
+; CHECK-NEXT:    ldr r1, .LCPI1_0
+; CHECK-NEXT:    ldr r1, [r1]
+; CHECK-NEXT:    cmp r1, r0
 ; CHECK-NEXT:    beq .LBB1_3
 ; CHECK-NEXT:  .LBB1_2: @ %return
 ; CHECK-NEXT:    mov r0, #0
-; CHECK-NEXT:    pop {r4, lr}
 ; CHECK-NEXT:    mov pc, lr
 ; CHECK-NEXT:  .LBB1_3: @ %if.then2
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    bl callVoid
 ; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    pop {r4, lr}

diff  --git a/llvm/test/CodeGen/ARM/divmod-eabi.ll b/llvm/test/CodeGen/ARM/divmod-eabi.ll
index 77ffc46e6a6914b..a7bfed7290e2a26 100644
--- a/llvm/test/CodeGen/ARM/divmod-eabi.ll
+++ b/llvm/test/CodeGen/ARM/divmod-eabi.ll
@@ -218,6 +218,7 @@ entry:
 ; DARWIN: mov [[sum:r[0-9]+]], r0
 ; DARWIN-O0: __modsi3
 ; WINDOWS: __rt_sdiv
+; WINDOWS: mov [[arg:r[0-9]+]], r1
 ; WINDOWS: mov [[rem:r[0-9]+]], r1
   %rem1 = srem i32 %b, %rem
 ; EABI: __aeabi_idivmod

diff  --git a/llvm/test/CodeGen/ARM/thumb2-size-opt.ll b/llvm/test/CodeGen/ARM/thumb2-size-opt.ll
index f9f29fc064a20ce..8cf7a702e8ed54d 100644
--- a/llvm/test/CodeGen/ARM/thumb2-size-opt.ll
+++ b/llvm/test/CodeGen/ARM/thumb2-size-opt.ll
@@ -85,8 +85,8 @@ entry:
 
 define i32 @bundled_instruction(ptr %addr, ptr %addr2, i1 %tst) minsize {
 ; CHECK-LABEL: bundled_instruction:
-; CHECK: itee ne
-; CHECK: ldmeq r3!, {{{r[0-9]+}}}
+; CHECK: iteee ne
+; CHECK: ldmeq r2!, {{{r[0-9]+}}}
   br i1 %tst, label %true, label %false
 
 true:

diff  --git a/llvm/test/CodeGen/AVR/cttz.ll b/llvm/test/CodeGen/AVR/cttz.ll
index b68994f3ab1d744..b1ef34acf631586 100644
--- a/llvm/test/CodeGen/AVR/cttz.ll
+++ b/llvm/test/CodeGen/AVR/cttz.ll
@@ -28,10 +28,9 @@ declare i8 @llvm.cttz.i8(i8)
 ; CHECK: mov    {{.*}}[[SCRATCH]], {{.*}}[[RESULT]]
 ; CHECK: swap   {{.*}}[[SCRATCH]]
 ; CHECK: add    {{.*}}[[SCRATCH]], {{.*}}[[RESULT]]
-; CHECK: andi   {{.*}}[[SCRATCH]], 15
 ; CHECK: mov    {{.*}}[[RESULT]], {{.*}}[[SCRATCH]]
+; CHECK: andi   {{.*}}[[RESULT]], 15
 ; CHECK: ret
 ; CHECK: [[END_BB]]:
-; CHECK: ldi    {{.*}}[[SCRATCH]], 8
-; CHECK: mov    {{.*}}[[RESULT]], {{.*}}[[SCRATCH]]
+; CHECK: ldi    {{.*}}[[RESULT]], 8
 ; CHECK: ret

diff  --git a/llvm/test/CodeGen/Hexagon/noreturn-noepilog.ll b/llvm/test/CodeGen/Hexagon/noreturn-noepilog.ll
index dd877c821bae008..14d5ca628d83c19 100644
--- a/llvm/test/CodeGen/Hexagon/noreturn-noepilog.ll
+++ b/llvm/test/CodeGen/Hexagon/noreturn-noepilog.ll
@@ -1,12 +1,10 @@
 ; RUN: llc -march=hexagon < %s | FileCheck %s
 ;
-; XFAIL: *
-; This test is failing after post-ra machine sinking.
 ;
 ; Check that no epilogue is inserted after a noreturn call.
 ;
 ; CHECK-LABEL: f1:
-; CHECK: allocframe(r29,#0):raw
+; CHECK: allocframe
 ; CHECK-NOT: deallocframe
 
 target triple = "hexagon"

diff  --git a/llvm/test/CodeGen/PowerPC/csr-split.ll b/llvm/test/CodeGen/PowerPC/csr-split.ll
index dea07f3c574203e..e24c7e3eaab6b9f 100644
--- a/llvm/test/CodeGen/PowerPC/csr-split.ll
+++ b/llvm/test/CodeGen/PowerPC/csr-split.ll
@@ -11,35 +11,47 @@
 define dso_local signext i32 @test1(ptr %b) local_unnamed_addr  {
 ; CHECK-PWR9-LABEL: test1:
 ; CHECK-PWR9:       # %bb.0: # %entry
+; CHECK-PWR9-NEXT:    addis r4, r2, a at toc@ha
+; CHECK-PWR9-NEXT:    lwa r4, a at toc@l(r4)
+; CHECK-PWR9-NEXT:    cmpld r4, r3
+; CHECK-PWR9-NEXT:    # implicit-def: $r4
+; CHECK-PWR9-NEXT:    beq cr0, .LBB0_2
+; CHECK-PWR9-NEXT:  # %bb.1: # %if.end
+; CHECK-PWR9-NEXT:    extsw r3, r4
+; CHECK-PWR9-NEXT:    blr
+; CHECK-PWR9-NEXT:  .LBB0_2: # %if.then
 ; CHECK-PWR9-NEXT:    mflr r0
 ; CHECK-PWR9-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-PWR9-NEXT:    .cfi_offset lr, 16
 ; CHECK-PWR9-NEXT:    .cfi_offset r30, -16
 ; CHECK-PWR9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-NEXT:    stdu r1, -48(r1)
-; CHECK-PWR9-NEXT:    mr r30, r3
-; CHECK-PWR9-NEXT:    addis r3, r2, a at toc@ha
 ; CHECK-PWR9-NEXT:    std r0, 64(r1)
-; CHECK-PWR9-NEXT:    lwa r3, a at toc@l(r3)
-; CHECK-PWR9-NEXT:    cmpld r3, r30
-; CHECK-PWR9-NEXT:    # implicit-def: $r3
-; CHECK-PWR9-NEXT:    bne cr0, .LBB0_2
-; CHECK-PWR9-NEXT:  # %bb.1: # %if.then
+; CHECK-PWR9-NEXT:    mr r30, r3
 ; CHECK-PWR9-NEXT:    bl callVoid
 ; CHECK-PWR9-NEXT:    nop
 ; CHECK-PWR9-NEXT:    mr r3, r30
 ; CHECK-PWR9-NEXT:    bl callNonVoid
 ; CHECK-PWR9-NEXT:    nop
-; CHECK-PWR9-NEXT:  .LBB0_2: # %if.end
-; CHECK-PWR9-NEXT:    extsw r3, r3
+; CHECK-PWR9-NEXT:    mr r4, r3
 ; CHECK-PWR9-NEXT:    addi r1, r1, 48
 ; CHECK-PWR9-NEXT:    ld r0, 16(r1)
 ; CHECK-PWR9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    mtlr r0
+; CHECK-PWR9-NEXT:    extsw r3, r4
 ; CHECK-PWR9-NEXT:    blr
 ;
 ; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis r4, r2, a at toc@ha
+; CHECK-NEXT:    lwa r4, a at toc@l(r4)
+; CHECK-NEXT:    cmpld r4, r3
+; CHECK-NEXT:    # implicit-def: $r4
+; CHECK-NEXT:    beq cr0, .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %if.end
+; CHECK-NEXT:    extsw r3, r4
+; CHECK-NEXT:    blr
+; CHECK-NEXT:  .LBB0_2: # %if.then
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -128(r1)
 ; CHECK-NEXT:    std r0, 144(r1)
@@ -48,23 +60,17 @@ define dso_local signext i32 @test1(ptr %b) local_unnamed_addr  {
 ; CHECK-NEXT:    .cfi_offset r30, -16
 ; CHECK-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    mr r30, r3
-; CHECK-NEXT:    addis r3, r2, a at toc@ha
-; CHECK-NEXT:    lwa r3, a at toc@l(r3)
-; CHECK-NEXT:    cmpld r3, r30
-; CHECK-NEXT:    # implicit-def: $r3
-; CHECK-NEXT:    bne cr0, .LBB0_2
-; CHECK-NEXT:  # %bb.1: # %if.then
 ; CHECK-NEXT:    bl callVoid
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mr r3, r30
 ; CHECK-NEXT:    bl callNonVoid
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:  .LBB0_2: # %if.end
 ; CHECK-NEXT:    ld r30, 112(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    mr r4, r3
 ; CHECK-NEXT:    addi r1, r1, 128
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    extsw r3, r4
 ; CHECK-NEXT:    blr
 entry:
   %0 = load i32, ptr @a, align 4, !tbaa !2

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll
index bd528731d239483..8955835f41ea6c6 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll
@@ -1927,11 +1927,12 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P10-NEXT:    std r0, 16(r1)
 ; BE-P10-NEXT:    hashst r0, -16(r1)
 ; BE-P10-NEXT:    stdu r1, -144(r1)
+; BE-P10-NEXT:    lwz r4, 12(r3)
 ; BE-P10-NEXT:    std r31, 136(r1) # 8-byte Folded Spill
 ; BE-P10-NEXT:    mr r31, r3
-; BE-P10-NEXT:    lwz r3, 12(r3)
-; BE-P10-NEXT:    stw r3, 124(r1)
-; BE-P10-NEXT:    addi r3, r1, 124
+; BE-P10-NEXT:    stw r4, 124(r1)
+; BE-P10-NEXT:    addi r4, r1, 124
+; BE-P10-NEXT:    mr r3, r4
 ; BE-P10-NEXT:    bl .callee2[PR]
 ; BE-P10-NEXT:    nop
 ; BE-P10-NEXT:    lwz r4, 16(r31)
@@ -1958,9 +1959,10 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P9-NEXT:    hashst r0, -16(r1)
 ; BE-P9-NEXT:    std r31, 136(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    mr r31, r3
-; BE-P9-NEXT:    lwz r3, 12(r3)
-; BE-P9-NEXT:    stw r3, 124(r1)
-; BE-P9-NEXT:    addi r3, r1, 124
+; BE-P9-NEXT:    lwz r4, 12(r3)
+; BE-P9-NEXT:    stw r4, 124(r1)
+; BE-P9-NEXT:    addi r4, r1, 124
+; BE-P9-NEXT:    mr r3, r4
 ; BE-P9-NEXT:    bl .callee2[PR]
 ; BE-P9-NEXT:    nop
 ; BE-P9-NEXT:    lwz r4, 16(r31)
@@ -1987,9 +1989,10 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P8-NEXT:    hashst r0, -16(r1)
 ; BE-P8-NEXT:    std r31, 136(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    mr r31, r3
-; BE-P8-NEXT:    lwz r3, 12(r3)
-; BE-P8-NEXT:    stw r3, 124(r1)
-; BE-P8-NEXT:    addi r3, r1, 124
+; BE-P8-NEXT:    lwz r4, 12(r3)
+; BE-P8-NEXT:    stw r4, 124(r1)
+; BE-P8-NEXT:    addi r4, r1, 124
+; BE-P8-NEXT:    mr r3, r4
 ; BE-P8-NEXT:    bl .callee2[PR]
 ; BE-P8-NEXT:    nop
 ; BE-P8-NEXT:    lwz r4, 16(r31)
@@ -2014,11 +2017,12 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-32BIT-P10-NEXT:    stw r0, 8(r1)
 ; BE-32BIT-P10-NEXT:    hashst r0, -16(r1)
 ; BE-32BIT-P10-NEXT:    stwu r1, -80(r1)
+; BE-32BIT-P10-NEXT:    lwz r4, 12(r3)
 ; BE-32BIT-P10-NEXT:    stw r31, 76(r1) # 4-byte Folded Spill
 ; BE-32BIT-P10-NEXT:    mr r31, r3
-; BE-32BIT-P10-NEXT:    lwz r3, 12(r3)
-; BE-32BIT-P10-NEXT:    stw r3, 60(r1)
-; BE-32BIT-P10-NEXT:    addi r3, r1, 60
+; BE-32BIT-P10-NEXT:    stw r4, 60(r1)
+; BE-32BIT-P10-NEXT:    addi r4, r1, 60
+; BE-32BIT-P10-NEXT:    mr r3, r4
 ; BE-32BIT-P10-NEXT:    bl .callee2[PR]
 ; BE-32BIT-P10-NEXT:    nop
 ; BE-32BIT-P10-NEXT:    lwz r4, 16(r31)
@@ -2044,9 +2048,10 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-32BIT-P9-NEXT:    hashst r0, -16(r1)
 ; BE-32BIT-P9-NEXT:    stw r31, 76(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    mr r31, r3
-; BE-32BIT-P9-NEXT:    lwz r3, 12(r3)
-; BE-32BIT-P9-NEXT:    stw r3, 60(r1)
-; BE-32BIT-P9-NEXT:    addi r3, r1, 60
+; BE-32BIT-P9-NEXT:    lwz r4, 12(r3)
+; BE-32BIT-P9-NEXT:    stw r4, 60(r1)
+; BE-32BIT-P9-NEXT:    addi r4, r1, 60
+; BE-32BIT-P9-NEXT:    mr r3, r4
 ; BE-32BIT-P9-NEXT:    bl .callee2[PR]
 ; BE-32BIT-P9-NEXT:    nop
 ; BE-32BIT-P9-NEXT:    lwz r4, 16(r31)
@@ -2072,9 +2077,10 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-32BIT-P8-NEXT:    hashst r0, -16(r1)
 ; BE-32BIT-P8-NEXT:    stw r31, 76(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    mr r31, r3
-; BE-32BIT-P8-NEXT:    lwz r3, 12(r3)
-; BE-32BIT-P8-NEXT:    stw r3, 60(r1)
-; BE-32BIT-P8-NEXT:    addi r3, r1, 60
+; BE-32BIT-P8-NEXT:    lwz r4, 12(r3)
+; BE-32BIT-P8-NEXT:    stw r4, 60(r1)
+; BE-32BIT-P8-NEXT:    addi r4, r1, 60
+; BE-32BIT-P8-NEXT:    mr r3, r4
 ; BE-32BIT-P8-NEXT:    bl .callee2[PR]
 ; BE-32BIT-P8-NEXT:    nop
 ; BE-32BIT-P8-NEXT:    lwz r4, 16(r31)
@@ -2098,11 +2104,12 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P10-PRIV-NEXT:    std r0, 16(r1)
 ; BE-P10-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-P10-PRIV-NEXT:    stdu r1, -144(r1)
+; BE-P10-PRIV-NEXT:    lwz r4, 12(r3)
 ; BE-P10-PRIV-NEXT:    std r31, 136(r1) # 8-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    mr r31, r3
-; BE-P10-PRIV-NEXT:    lwz r3, 12(r3)
-; BE-P10-PRIV-NEXT:    stw r3, 124(r1)
-; BE-P10-PRIV-NEXT:    addi r3, r1, 124
+; BE-P10-PRIV-NEXT:    stw r4, 124(r1)
+; BE-P10-PRIV-NEXT:    addi r4, r1, 124
+; BE-P10-PRIV-NEXT:    mr r3, r4
 ; BE-P10-PRIV-NEXT:    bl .callee2[PR]
 ; BE-P10-PRIV-NEXT:    nop
 ; BE-P10-PRIV-NEXT:    lwz r4, 16(r31)
@@ -2129,9 +2136,10 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P9-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-P9-PRIV-NEXT:    std r31, 136(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    mr r31, r3
-; BE-P9-PRIV-NEXT:    lwz r3, 12(r3)
-; BE-P9-PRIV-NEXT:    stw r3, 124(r1)
-; BE-P9-PRIV-NEXT:    addi r3, r1, 124
+; BE-P9-PRIV-NEXT:    lwz r4, 12(r3)
+; BE-P9-PRIV-NEXT:    stw r4, 124(r1)
+; BE-P9-PRIV-NEXT:    addi r4, r1, 124
+; BE-P9-PRIV-NEXT:    mr r3, r4
 ; BE-P9-PRIV-NEXT:    bl .callee2[PR]
 ; BE-P9-PRIV-NEXT:    nop
 ; BE-P9-PRIV-NEXT:    lwz r4, 16(r31)
@@ -2158,9 +2166,10 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P8-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-P8-PRIV-NEXT:    std r31, 136(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    mr r31, r3
-; BE-P8-PRIV-NEXT:    lwz r3, 12(r3)
-; BE-P8-PRIV-NEXT:    stw r3, 124(r1)
-; BE-P8-PRIV-NEXT:    addi r3, r1, 124
+; BE-P8-PRIV-NEXT:    lwz r4, 12(r3)
+; BE-P8-PRIV-NEXT:    stw r4, 124(r1)
+; BE-P8-PRIV-NEXT:    addi r4, r1, 124
+; BE-P8-PRIV-NEXT:    mr r3, r4
 ; BE-P8-PRIV-NEXT:    bl .callee2[PR]
 ; BE-P8-PRIV-NEXT:    nop
 ; BE-P8-PRIV-NEXT:    lwz r4, 16(r31)
@@ -2185,11 +2194,12 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-32BIT-P10-PRIV-NEXT:    stw r0, 8(r1)
 ; BE-32BIT-P10-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-32BIT-P10-PRIV-NEXT:    stwu r1, -80(r1)
+; BE-32BIT-P10-PRIV-NEXT:    lwz r4, 12(r3)
 ; BE-32BIT-P10-PRIV-NEXT:    stw r31, 76(r1) # 4-byte Folded Spill
 ; BE-32BIT-P10-PRIV-NEXT:    mr r31, r3
-; BE-32BIT-P10-PRIV-NEXT:    lwz r3, 12(r3)
-; BE-32BIT-P10-PRIV-NEXT:    stw r3, 60(r1)
-; BE-32BIT-P10-PRIV-NEXT:    addi r3, r1, 60
+; BE-32BIT-P10-PRIV-NEXT:    stw r4, 60(r1)
+; BE-32BIT-P10-PRIV-NEXT:    addi r4, r1, 60
+; BE-32BIT-P10-PRIV-NEXT:    mr r3, r4
 ; BE-32BIT-P10-PRIV-NEXT:    bl .callee2[PR]
 ; BE-32BIT-P10-PRIV-NEXT:    nop
 ; BE-32BIT-P10-PRIV-NEXT:    lwz r4, 16(r31)
@@ -2215,9 +2225,10 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-32BIT-P9-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-32BIT-P9-PRIV-NEXT:    stw r31, 76(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    mr r31, r3
-; BE-32BIT-P9-PRIV-NEXT:    lwz r3, 12(r3)
-; BE-32BIT-P9-PRIV-NEXT:    stw r3, 60(r1)
-; BE-32BIT-P9-PRIV-NEXT:    addi r3, r1, 60
+; BE-32BIT-P9-PRIV-NEXT:    lwz r4, 12(r3)
+; BE-32BIT-P9-PRIV-NEXT:    stw r4, 60(r1)
+; BE-32BIT-P9-PRIV-NEXT:    addi r4, r1, 60
+; BE-32BIT-P9-PRIV-NEXT:    mr r3, r4
 ; BE-32BIT-P9-PRIV-NEXT:    bl .callee2[PR]
 ; BE-32BIT-P9-PRIV-NEXT:    nop
 ; BE-32BIT-P9-PRIV-NEXT:    lwz r4, 16(r31)
@@ -2243,9 +2254,10 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-32BIT-P8-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    stw r31, 76(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    mr r31, r3
-; BE-32BIT-P8-PRIV-NEXT:    lwz r3, 12(r3)
-; BE-32BIT-P8-PRIV-NEXT:    stw r3, 60(r1)
-; BE-32BIT-P8-PRIV-NEXT:    addi r3, r1, 60
+; BE-32BIT-P8-PRIV-NEXT:    lwz r4, 12(r3)
+; BE-32BIT-P8-PRIV-NEXT:    stw r4, 60(r1)
+; BE-32BIT-P8-PRIV-NEXT:    addi r4, r1, 60
+; BE-32BIT-P8-PRIV-NEXT:    mr r3, r4
 ; BE-32BIT-P8-PRIV-NEXT:    bl .callee2[PR]
 ; BE-32BIT-P8-PRIV-NEXT:    nop
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r4, 16(r31)

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
index 1cfa811807baf73..1ad1483bd81a833 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
@@ -2864,10 +2864,11 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; LE-P10-NEXT:    std r0, 16(r1)
 ; LE-P10-NEXT:    hashst r0, -24(r1)
 ; LE-P10-NEXT:    stdu r1, -64(r1)
+; LE-P10-NEXT:    lwz r4, 12(r3)
 ; LE-P10-NEXT:    mr r30, r3
-; LE-P10-NEXT:    lwz r3, 12(r3)
-; LE-P10-NEXT:    stw r3, 36(r1)
-; LE-P10-NEXT:    addi r3, r1, 36
+; LE-P10-NEXT:    stw r4, 36(r1)
+; LE-P10-NEXT:    addi r4, r1, 36
+; LE-P10-NEXT:    mr r3, r4
 ; LE-P10-NEXT:    bl callee2 at notoc
 ; LE-P10-NEXT:    lwz r4, 16(r30)
 ; LE-P10-NEXT:    add r3, r4, r3
@@ -2893,9 +2894,10 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; LE-P9-NEXT:    std r0, 80(r1)
 ; LE-P9-NEXT:    hashst r0, -24(r1)
 ; LE-P9-NEXT:    mr r30, r3
-; LE-P9-NEXT:    lwz r3, 12(r3)
-; LE-P9-NEXT:    stw r3, 36(r1)
-; LE-P9-NEXT:    addi r3, r1, 36
+; LE-P9-NEXT:    lwz r4, 12(r3)
+; LE-P9-NEXT:    stw r4, 36(r1)
+; LE-P9-NEXT:    addi r4, r1, 36
+; LE-P9-NEXT:    mr r3, r4
 ; LE-P9-NEXT:    bl callee2
 ; LE-P9-NEXT:    nop
 ; LE-P9-NEXT:    lwz r4, 16(r30)
@@ -2922,9 +2924,10 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; LE-P8-NEXT:    std r0, 80(r1)
 ; LE-P8-NEXT:    hashst r0, -24(r1)
 ; LE-P8-NEXT:    mr r30, r3
-; LE-P8-NEXT:    lwz r3, 12(r3)
-; LE-P8-NEXT:    stw r3, 36(r1)
-; LE-P8-NEXT:    addi r3, r1, 36
+; LE-P8-NEXT:    lwz r4, 12(r3)
+; LE-P8-NEXT:    stw r4, 36(r1)
+; LE-P8-NEXT:    addi r4, r1, 36
+; LE-P8-NEXT:    mr r3, r4
 ; LE-P8-NEXT:    bl callee2
 ; LE-P8-NEXT:    nop
 ; LE-P8-NEXT:    lwz r4, 16(r30)
@@ -3045,11 +3048,12 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P10-NEXT:    std r0, 16(r1)
 ; BE-P10-NEXT:    hashst r0, -24(r1)
 ; BE-P10-NEXT:    stdu r1, -144(r1)
+; BE-P10-NEXT:    lwz r4, 12(r3)
 ; BE-P10-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; BE-P10-NEXT:    mr r30, r3
-; BE-P10-NEXT:    lwz r3, 12(r3)
-; BE-P10-NEXT:    stw r3, 116(r1)
-; BE-P10-NEXT:    addi r3, r1, 116
+; BE-P10-NEXT:    stw r4, 116(r1)
+; BE-P10-NEXT:    addi r4, r1, 116
+; BE-P10-NEXT:    mr r3, r4
 ; BE-P10-NEXT:    bl callee2
 ; BE-P10-NEXT:    nop
 ; BE-P10-NEXT:    lwz r4, 16(r30)
@@ -3076,9 +3080,10 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P9-NEXT:    hashst r0, -24(r1)
 ; BE-P9-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    mr r30, r3
-; BE-P9-NEXT:    lwz r3, 12(r3)
-; BE-P9-NEXT:    stw r3, 116(r1)
-; BE-P9-NEXT:    addi r3, r1, 116
+; BE-P9-NEXT:    lwz r4, 12(r3)
+; BE-P9-NEXT:    stw r4, 116(r1)
+; BE-P9-NEXT:    addi r4, r1, 116
+; BE-P9-NEXT:    mr r3, r4
 ; BE-P9-NEXT:    bl callee2
 ; BE-P9-NEXT:    nop
 ; BE-P9-NEXT:    lwz r4, 16(r30)
@@ -3105,9 +3110,10 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P8-NEXT:    hashst r0, -24(r1)
 ; BE-P8-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    mr r30, r3
-; BE-P8-NEXT:    lwz r3, 12(r3)
-; BE-P8-NEXT:    stw r3, 116(r1)
-; BE-P8-NEXT:    addi r3, r1, 116
+; BE-P8-NEXT:    lwz r4, 12(r3)
+; BE-P8-NEXT:    stw r4, 116(r1)
+; BE-P8-NEXT:    addi r4, r1, 116
+; BE-P8-NEXT:    mr r3, r4
 ; BE-P8-NEXT:    bl callee2
 ; BE-P8-NEXT:    nop
 ; BE-P8-NEXT:    lwz r4, 16(r30)
@@ -3133,10 +3139,11 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-32BIT-P10-NEXT:    stw r30, 24(r1) # 4-byte Folded Spill
 ; BE-32BIT-P10-NEXT:    beq cr0, .LBB2_2
 ; BE-32BIT-P10-NEXT:  # %bb.1: # %if.end
+; BE-32BIT-P10-NEXT:    lwz r4, 12(r3)
 ; BE-32BIT-P10-NEXT:    mr r30, r3
-; BE-32BIT-P10-NEXT:    lwz r3, 12(r3)
-; BE-32BIT-P10-NEXT:    stw r3, 12(r1)
-; BE-32BIT-P10-NEXT:    addi r3, r1, 12
+; BE-32BIT-P10-NEXT:    stw r4, 12(r1)
+; BE-32BIT-P10-NEXT:    addi r4, r1, 12
+; BE-32BIT-P10-NEXT:    mr r3, r4
 ; BE-32BIT-P10-NEXT:    bl callee2
 ; BE-32BIT-P10-NEXT:    lwz r4, 16(r30)
 ; BE-32BIT-P10-NEXT:    add r3, r4, r3
@@ -3161,10 +3168,11 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-32BIT-P9-NEXT:    stw r30, 24(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    beq cr0, .LBB2_2
 ; BE-32BIT-P9-NEXT:  # %bb.1: # %if.end
+; BE-32BIT-P9-NEXT:    lwz r4, 12(r3)
 ; BE-32BIT-P9-NEXT:    mr r30, r3
-; BE-32BIT-P9-NEXT:    lwz r3, 12(r3)
-; BE-32BIT-P9-NEXT:    stw r3, 12(r1)
-; BE-32BIT-P9-NEXT:    addi r3, r1, 12
+; BE-32BIT-P9-NEXT:    stw r4, 12(r1)
+; BE-32BIT-P9-NEXT:    addi r4, r1, 12
+; BE-32BIT-P9-NEXT:    mr r3, r4
 ; BE-32BIT-P9-NEXT:    bl callee2
 ; BE-32BIT-P9-NEXT:    lwz r4, 16(r30)
 ; BE-32BIT-P9-NEXT:    add r3, r4, r3
@@ -3189,10 +3197,11 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-32BIT-P8-NEXT:    stw r30, 24(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    beq cr0, .LBB2_2
 ; BE-32BIT-P8-NEXT:  # %bb.1: # %if.end
+; BE-32BIT-P8-NEXT:    lwz r4, 12(r3)
 ; BE-32BIT-P8-NEXT:    mr r30, r3
-; BE-32BIT-P8-NEXT:    lwz r3, 12(r3)
-; BE-32BIT-P8-NEXT:    stw r3, 12(r1)
-; BE-32BIT-P8-NEXT:    addi r3, r1, 12
+; BE-32BIT-P8-NEXT:    stw r4, 12(r1)
+; BE-32BIT-P8-NEXT:    addi r4, r1, 12
+; BE-32BIT-P8-NEXT:    mr r3, r4
 ; BE-32BIT-P8-NEXT:    bl callee2
 ; BE-32BIT-P8-NEXT:    lwz r4, 16(r30)
 ; BE-32BIT-P8-NEXT:    add r3, r4, r3
@@ -3217,10 +3226,11 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; LE-P10-PRIV-NEXT:    std r0, 16(r1)
 ; LE-P10-PRIV-NEXT:    hashstp r0, -24(r1)
 ; LE-P10-PRIV-NEXT:    stdu r1, -64(r1)
+; LE-P10-PRIV-NEXT:    lwz r4, 12(r3)
 ; LE-P10-PRIV-NEXT:    mr r30, r3
-; LE-P10-PRIV-NEXT:    lwz r3, 12(r3)
-; LE-P10-PRIV-NEXT:    stw r3, 36(r1)
-; LE-P10-PRIV-NEXT:    addi r3, r1, 36
+; LE-P10-PRIV-NEXT:    stw r4, 36(r1)
+; LE-P10-PRIV-NEXT:    addi r4, r1, 36
+; LE-P10-PRIV-NEXT:    mr r3, r4
 ; LE-P10-PRIV-NEXT:    bl callee2 at notoc
 ; LE-P10-PRIV-NEXT:    lwz r4, 16(r30)
 ; LE-P10-PRIV-NEXT:    add r3, r4, r3
@@ -3246,9 +3256,10 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; LE-P9-PRIV-NEXT:    std r0, 80(r1)
 ; LE-P9-PRIV-NEXT:    hashstp r0, -24(r1)
 ; LE-P9-PRIV-NEXT:    mr r30, r3
-; LE-P9-PRIV-NEXT:    lwz r3, 12(r3)
-; LE-P9-PRIV-NEXT:    stw r3, 36(r1)
-; LE-P9-PRIV-NEXT:    addi r3, r1, 36
+; LE-P9-PRIV-NEXT:    lwz r4, 12(r3)
+; LE-P9-PRIV-NEXT:    stw r4, 36(r1)
+; LE-P9-PRIV-NEXT:    addi r4, r1, 36
+; LE-P9-PRIV-NEXT:    mr r3, r4
 ; LE-P9-PRIV-NEXT:    bl callee2
 ; LE-P9-PRIV-NEXT:    nop
 ; LE-P9-PRIV-NEXT:    lwz r4, 16(r30)
@@ -3275,9 +3286,10 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; LE-P8-PRIV-NEXT:    std r0, 80(r1)
 ; LE-P8-PRIV-NEXT:    hashstp r0, -24(r1)
 ; LE-P8-PRIV-NEXT:    mr r30, r3
-; LE-P8-PRIV-NEXT:    lwz r3, 12(r3)
-; LE-P8-PRIV-NEXT:    stw r3, 36(r1)
-; LE-P8-PRIV-NEXT:    addi r3, r1, 36
+; LE-P8-PRIV-NEXT:    lwz r4, 12(r3)
+; LE-P8-PRIV-NEXT:    stw r4, 36(r1)
+; LE-P8-PRIV-NEXT:    addi r4, r1, 36
+; LE-P8-PRIV-NEXT:    mr r3, r4
 ; LE-P8-PRIV-NEXT:    bl callee2
 ; LE-P8-PRIV-NEXT:    nop
 ; LE-P8-PRIV-NEXT:    lwz r4, 16(r30)
@@ -3302,11 +3314,12 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P10-PRIV-NEXT:    std r0, 16(r1)
 ; BE-P10-PRIV-NEXT:    hashstp r0, -24(r1)
 ; BE-P10-PRIV-NEXT:    stdu r1, -144(r1)
+; BE-P10-PRIV-NEXT:    lwz r4, 12(r3)
 ; BE-P10-PRIV-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    mr r30, r3
-; BE-P10-PRIV-NEXT:    lwz r3, 12(r3)
-; BE-P10-PRIV-NEXT:    stw r3, 116(r1)
-; BE-P10-PRIV-NEXT:    addi r3, r1, 116
+; BE-P10-PRIV-NEXT:    stw r4, 116(r1)
+; BE-P10-PRIV-NEXT:    addi r4, r1, 116
+; BE-P10-PRIV-NEXT:    mr r3, r4
 ; BE-P10-PRIV-NEXT:    bl callee2
 ; BE-P10-PRIV-NEXT:    nop
 ; BE-P10-PRIV-NEXT:    lwz r4, 16(r30)
@@ -3333,9 +3346,10 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P9-PRIV-NEXT:    hashstp r0, -24(r1)
 ; BE-P9-PRIV-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    mr r30, r3
-; BE-P9-PRIV-NEXT:    lwz r3, 12(r3)
-; BE-P9-PRIV-NEXT:    stw r3, 116(r1)
-; BE-P9-PRIV-NEXT:    addi r3, r1, 116
+; BE-P9-PRIV-NEXT:    lwz r4, 12(r3)
+; BE-P9-PRIV-NEXT:    stw r4, 116(r1)
+; BE-P9-PRIV-NEXT:    addi r4, r1, 116
+; BE-P9-PRIV-NEXT:    mr r3, r4
 ; BE-P9-PRIV-NEXT:    bl callee2
 ; BE-P9-PRIV-NEXT:    nop
 ; BE-P9-PRIV-NEXT:    lwz r4, 16(r30)
@@ -3362,9 +3376,10 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P8-PRIV-NEXT:    hashstp r0, -24(r1)
 ; BE-P8-PRIV-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    mr r30, r3
-; BE-P8-PRIV-NEXT:    lwz r3, 12(r3)
-; BE-P8-PRIV-NEXT:    stw r3, 116(r1)
-; BE-P8-PRIV-NEXT:    addi r3, r1, 116
+; BE-P8-PRIV-NEXT:    lwz r4, 12(r3)
+; BE-P8-PRIV-NEXT:    stw r4, 116(r1)
+; BE-P8-PRIV-NEXT:    addi r4, r1, 116
+; BE-P8-PRIV-NEXT:    mr r3, r4
 ; BE-P8-PRIV-NEXT:    bl callee2
 ; BE-P8-PRIV-NEXT:    nop
 ; BE-P8-PRIV-NEXT:    lwz r4, 16(r30)

diff  --git a/llvm/test/CodeGen/PowerPC/subreg-postra.ll b/llvm/test/CodeGen/PowerPC/subreg-postra.ll
index e13906940cf39eb..32a1b85cac8f565 100644
--- a/llvm/test/CodeGen/PowerPC/subreg-postra.ll
+++ b/llvm/test/CodeGen/PowerPC/subreg-postra.ll
@@ -151,7 +151,7 @@ wait_on_buffer.exit1319:                          ; preds = %while.body392
 ; CHECK: stdcx.
 ; CHECK: iselgt {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 ; CHECK-NO-ISEL: bc 12, 1, [[TRUE:.LBB[0-9]+]]
-; CHECK-NO-ISEL: ori 29, 3, 0
+; CHECK-NO-ISEL: ori 5, 3, 0
 ; CHECK-NO-ISEL: b [[SUCCESSOR:.LBB[0-9]+]]
 
 

diff  --git a/llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll b/llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll
index 45ea5f2be02be97..38a8cf5ba781532 100644
--- a/llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll
+++ b/llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll
@@ -165,8 +165,7 @@ define void @tail_dup_no_succ(i32 %tag) {
 ; CHECK-NEXT:    .cfi_offset r30, -16
 ; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stdu 1, -48(1)
-; CHECK-NEXT:    mr 30, 3
-; CHECK-NEXT:    andi. 3, 3, 1
+; CHECK-NEXT:    andi. 4, 3, 1
 ; CHECK-NEXT:    std 0, 64(1)
 ; CHECK-NEXT:    bc 12, 1, .LBB2_3
 ; CHECK-NEXT:  .LBB2_1: # %v
@@ -181,18 +180,20 @@ define void @tail_dup_no_succ(i32 %tag) {
 ; CHECK-NEXT:    mtlr 0
 ; CHECK-NEXT:    blr
 ; CHECK-NEXT:  .LBB2_3: # %bb
-; CHECK-NEXT:    andi. 3, 30, 2
+; CHECK-NEXT:    andi. 4, 3, 2
 ; CHECK-NEXT:    bne 0, .LBB2_5
 ; CHECK-NEXT:  # %bb.4: # %succ
-; CHECK-NEXT:    andi. 3, 30, 4
+; CHECK-NEXT:    andi. 3, 3, 4
 ; CHECK-NEXT:    beq 0, .LBB2_2
 ; CHECK-NEXT:    b .LBB2_1
 ; CHECK-NEXT:  .LBB2_5: # %c
+; CHECK-NEXT:    mr 30, 3
 ; CHECK-NEXT:    bl c
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    bl c
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    andi. 3, 30, 4
+; CHECK-NEXT:    mr 3, 30
+; CHECK-NEXT:    andi. 3, 3, 4
 ; CHECK-NEXT:    beq 0, .LBB2_2
 ; CHECK-NEXT:    b .LBB2_1
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/tls-cse.ll b/llvm/test/CodeGen/PowerPC/tls-cse.ll
index de4c1768e128925..0e821bb3ac3124a 100644
--- a/llvm/test/CodeGen/PowerPC/tls-cse.ll
+++ b/llvm/test/CodeGen/PowerPC/tls-cse.ll
@@ -44,6 +44,6 @@ _ZN4llvm21PrettyStackTraceEntryD2Ev.exit:         ; preds = %entry
 ; CHECK: addis [[REG1:[0-9]+]], 2, _ZL20PrettyStackTraceHead at got@tlsld at ha
 ; CHECK: addi 3, [[REG1]], _ZL20PrettyStackTraceHead at got@tlsld at l
 ; CHECK: bl __tls_get_addr(_ZL20PrettyStackTraceHead at tlsld)
-; CHECK: addis 3, 3, _ZL20PrettyStackTraceHead at dtprel@ha
-; CHECK: ld {{[0-9]+}}, _ZL20PrettyStackTraceHead at dtprel@l(3)
-; CHECK: std {{[0-9]+}}, _ZL20PrettyStackTraceHead at dtprel@l(3)
+; CHECK: addis [[REG2:[0-9]+]], 3, _ZL20PrettyStackTraceHead at dtprel@ha
+; CHECK: ld {{[0-9]+}}, _ZL20PrettyStackTraceHead at dtprel@l([[REG2]])
+; CHECK: std {{[0-9]+}}, _ZL20PrettyStackTraceHead at dtprel@l([[REG2]])

diff  --git a/llvm/test/CodeGen/RISCV/double-stack-spill-restore.ll b/llvm/test/CodeGen/RISCV/double-stack-spill-restore.ll
index fcfb7d4ec877628..2fec986f0ba5688 100644
--- a/llvm/test/CodeGen/RISCV/double-stack-spill-restore.ll
+++ b/llvm/test/CodeGen/RISCV/double-stack-spill-restore.ll
@@ -100,24 +100,20 @@ define double @func(double %d, i32 %n) nounwind {
 ;
 ; RV64IZFINXZDINX-LABEL: func:
 ; RV64IZFINXZDINX:       # %bb.0: # %entry
-; RV64IZFINXZDINX-NEXT:    addi sp, sp, -16
-; RV64IZFINXZDINX-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINXZDINX-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
 ; RV64IZFINXZDINX-NEXT:    sext.w a2, a1
-; RV64IZFINXZDINX-NEXT:    mv s0, a0
 ; RV64IZFINXZDINX-NEXT:    beqz a2, .LBB0_2
 ; RV64IZFINXZDINX-NEXT:  # %bb.1: # %if.else
+; RV64IZFINXZDINX-NEXT:    addi sp, sp, -16
+; RV64IZFINXZDINX-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZFINXZDINX-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
 ; RV64IZFINXZDINX-NEXT:    addiw a1, a1, -1
-; RV64IZFINXZDINX-NEXT:    mv a0, s0
+; RV64IZFINXZDINX-NEXT:    mv s0, a0
 ; RV64IZFINXZDINX-NEXT:    call func at plt
 ; RV64IZFINXZDINX-NEXT:    fadd.d a0, a0, s0
-; RV64IZFINXZDINX-NEXT:    j .LBB0_3
-; RV64IZFINXZDINX-NEXT:  .LBB0_2: # %return
-; RV64IZFINXZDINX-NEXT:    mv a0, s0
-; RV64IZFINXZDINX-NEXT:  .LBB0_3: # %return
 ; RV64IZFINXZDINX-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64IZFINXZDINX-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64IZFINXZDINX-NEXT:    addi sp, sp, 16
+; RV64IZFINXZDINX-NEXT:  .LBB0_2: # %return
 ; RV64IZFINXZDINX-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %n, 0

diff  --git a/llvm/test/CodeGen/RISCV/fmax-fmin.ll b/llvm/test/CodeGen/RISCV/fmax-fmin.ll
index 8a1ab85a1b3366a..aac5e5efc2cf533 100644
--- a/llvm/test/CodeGen/RISCV/fmax-fmin.ll
+++ b/llvm/test/CodeGen/RISCV/fmax-fmin.ll
@@ -99,18 +99,17 @@ define double @maxnum_f64_nnan(double %x, double %y) nounwind {
 ; R32-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; R32-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; R32-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; R32-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
 ; R32-NEXT:    mv s1, a3
 ; R32-NEXT:    mv s2, a2
 ; R32-NEXT:    mv s0, a1
-; R32-NEXT:    mv s4, a0
+; R32-NEXT:    mv s3, a0
 ; R32-NEXT:    call __gtdf2 at plt
-; R32-NEXT:    mv s3, s4
-; R32-NEXT:    bgtz a0, .LBB3_2
+; R32-NEXT:    mv a1, a0
+; R32-NEXT:    mv a0, s3
+; R32-NEXT:    bgtz a1, .LBB3_2
 ; R32-NEXT:  # %bb.1:
 ; R32-NEXT:    mv s3, s2
 ; R32-NEXT:  .LBB3_2:
-; R32-NEXT:    mv a0, s4
 ; R32-NEXT:    mv a1, s0
 ; R32-NEXT:    mv a2, s2
 ; R32-NEXT:    mv a3, s1
@@ -126,7 +125,6 @@ define double @maxnum_f64_nnan(double %x, double %y) nounwind {
 ; R32-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
 ; R32-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; R32-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; R32-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
 ; R32-NEXT:    addi sp, sp, 32
 ; R32-NEXT:    ret
 ;
@@ -250,18 +248,17 @@ define double @minnum_f64_fast(double %x, double %y) nounwind {
 ; R32-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; R32-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; R32-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; R32-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
 ; R32-NEXT:    mv s1, a3
 ; R32-NEXT:    mv s2, a2
 ; R32-NEXT:    mv s0, a1
-; R32-NEXT:    mv s4, a0
+; R32-NEXT:    mv s3, a0
 ; R32-NEXT:    call __ltdf2 at plt
-; R32-NEXT:    mv s3, s4
-; R32-NEXT:    bltz a0, .LBB7_2
+; R32-NEXT:    mv a1, a0
+; R32-NEXT:    mv a0, s3
+; R32-NEXT:    bltz a1, .LBB7_2
 ; R32-NEXT:  # %bb.1:
 ; R32-NEXT:    mv s3, s2
 ; R32-NEXT:  .LBB7_2:
-; R32-NEXT:    mv a0, s4
 ; R32-NEXT:    mv a1, s0
 ; R32-NEXT:    mv a2, s2
 ; R32-NEXT:    mv a3, s1
@@ -277,7 +274,6 @@ define double @minnum_f64_fast(double %x, double %y) nounwind {
 ; R32-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
 ; R32-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; R32-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; R32-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
 ; R32-NEXT:    addi sp, sp, 32
 ; R32-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
index dcaca2da146dba8..ffbecc5074d3a6c 100644
--- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
@@ -1002,17 +1002,17 @@ declare void @call(i1)
 define i1 @usubo_ult_sub_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) {
 ; RV32-LABEL: usubo_ult_sub_dominates_i64:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    andi a7, a5, 1
-; RV32-NEXT:    beqz a7, .LBB31_5
+; RV32-NEXT:    andi a6, a5, 1
+; RV32-NEXT:    beqz a6, .LBB31_5
 ; RV32-NEXT:  # %bb.1: # %t
-; RV32-NEXT:    mv a6, a0
+; RV32-NEXT:    mv a7, a0
 ; RV32-NEXT:    sltu a0, a0, a2
 ; RV32-NEXT:    sub t0, a1, a3
 ; RV32-NEXT:    sub t0, t0, a0
-; RV32-NEXT:    sub a2, a6, a2
+; RV32-NEXT:    sub a2, a7, a2
 ; RV32-NEXT:    sw a2, 0(a4)
 ; RV32-NEXT:    sw t0, 4(a4)
-; RV32-NEXT:    beqz a7, .LBB31_5
+; RV32-NEXT:    beqz a6, .LBB31_5
 ; RV32-NEXT:  # %bb.2: # %end
 ; RV32-NEXT:    beq a1, a3, .LBB31_4
 ; RV32-NEXT:  # %bb.3: # %end

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
index eb3953f9e366958..96100d2b62e41b2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
@@ -414,7 +414,7 @@ define <32 x double> @strided_vpload_v32f64(ptr %ptr, i32 signext %stride, <32 x
 ; CHECK-LABEL: strided_vpload_v32f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a4, 16
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v9, v0
 ; CHECK-NEXT:    mv a3, a2
 ; CHECK-NEXT:    bltu a2, a4, .LBB33_2
 ; CHECK-NEXT:  # %bb.1:
@@ -427,11 +427,12 @@ define <32 x double> @strided_vpload_v32f64(ptr %ptr, i32 signext %stride, <32 x
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a5
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v0, v8, 2
+; CHECK-NEXT:    vslidedown.vi v8, v9, 2
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    vlse64.v v16, (a4), a1, v0.t
 ; CHECK-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v0, v9
 ; CHECK-NEXT:    vlse64.v v8, (a0), a1, v0.t
 ; CHECK-NEXT:    ret
   %load = call <32 x double> @llvm.experimental.vp.strided.load.v32f64.p0.i32(ptr %ptr, i32 %stride, <32 x i1> %m, i32 %evl)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
index 718e68505a93a44..c023c6a86f8b682 100644
--- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
@@ -765,7 +765,7 @@ declare <vscale x 3 x double> @llvm.experimental.vp.strided.load.nxv3f64.p0.i32(
 define <vscale x 16 x double> @strided_load_nxv16f64(ptr %ptr, i64 %stride, <vscale x 16 x i1> %mask, i32 zeroext %evl) {
 ; CHECK-RV32-LABEL: strided_load_nxv16f64:
 ; CHECK-RV32:       # %bb.0:
-; CHECK-RV32-NEXT:    vmv1r.v v8, v0
+; CHECK-RV32-NEXT:    vmv1r.v v9, v0
 ; CHECK-RV32-NEXT:    csrr a4, vlenb
 ; CHECK-RV32-NEXT:    sub a2, a3, a4
 ; CHECK-RV32-NEXT:    sltu a5, a3, a2
@@ -779,17 +779,18 @@ define <vscale x 16 x double> @strided_load_nxv16f64(ptr %ptr, i64 %stride, <vsc
 ; CHECK-RV32-NEXT:    add a5, a0, a5
 ; CHECK-RV32-NEXT:    srli a4, a4, 3
 ; CHECK-RV32-NEXT:    vsetvli a6, zero, e8, mf4, ta, ma
-; CHECK-RV32-NEXT:    vslidedown.vx v0, v8, a4
+; CHECK-RV32-NEXT:    vslidedown.vx v8, v9, a4
 ; CHECK-RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV32-NEXT:    vmv1r.v v0, v8
 ; CHECK-RV32-NEXT:    vlse64.v v16, (a5), a1, v0.t
 ; CHECK-RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-RV32-NEXT:    vmv1r.v v0, v8
+; CHECK-RV32-NEXT:    vmv1r.v v0, v9
 ; CHECK-RV32-NEXT:    vlse64.v v8, (a0), a1, v0.t
 ; CHECK-RV32-NEXT:    ret
 ;
 ; CHECK-RV64-LABEL: strided_load_nxv16f64:
 ; CHECK-RV64:       # %bb.0:
-; CHECK-RV64-NEXT:    vmv1r.v v8, v0
+; CHECK-RV64-NEXT:    vmv1r.v v9, v0
 ; CHECK-RV64-NEXT:    csrr a4, vlenb
 ; CHECK-RV64-NEXT:    sub a3, a2, a4
 ; CHECK-RV64-NEXT:    sltu a5, a2, a3
@@ -803,11 +804,12 @@ define <vscale x 16 x double> @strided_load_nxv16f64(ptr %ptr, i64 %stride, <vsc
 ; CHECK-RV64-NEXT:    add a5, a0, a5
 ; CHECK-RV64-NEXT:    srli a4, a4, 3
 ; CHECK-RV64-NEXT:    vsetvli a6, zero, e8, mf4, ta, ma
-; CHECK-RV64-NEXT:    vslidedown.vx v0, v8, a4
+; CHECK-RV64-NEXT:    vslidedown.vx v8, v9, a4
 ; CHECK-RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV64-NEXT:    vmv1r.v v0, v8
 ; CHECK-RV64-NEXT:    vlse64.v v16, (a5), a1, v0.t
 ; CHECK-RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-RV64-NEXT:    vmv1r.v v0, v8
+; CHECK-RV64-NEXT:    vmv1r.v v0, v9
 ; CHECK-RV64-NEXT:    vlse64.v v8, (a0), a1, v0.t
 ; CHECK-RV64-NEXT:    ret
   %v = call <vscale x 16 x double> @llvm.experimental.vp.strided.load.nxv16f64.p0.i64(ptr %ptr, i64 %stride, <vscale x 16 x i1> %mask, i32 %evl)

diff  --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll
index 5e55c9aa15f5e04..f2cdd4a94b91803 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll
@@ -33,7 +33,8 @@ entry:
 
 if.end:                                           ; preds = %entry
   %mul = mul nsw i32 %a, 11
-  %sub = sub nsw i32 %mul, %b
+  %xor = xor i32 %mul, 100
+  %sub = sub nsw i32 %xor, %b
   %call = tail call i32 @h(i32 %sub, i32 %b)
   %add = add nsw i32 %call, %b
   %mul1 = mul nsw i32 %add, %call
@@ -81,7 +82,8 @@ entry:
 
 if.end:                                           ; preds = %entry
   %mul = mul nsw i32 %a, 11
-  %sub = sub nsw i32 %mul, %b
+  %xor = xor i32 %mul, 100
+  %sub = sub nsw i32 %xor, %b
   %call = tail call i32 @h(i32 %sub, i32 %b)
   %add = add nsw i32 %call, %b
   %mul1 = mul nsw i32 %add, %call

diff  --git a/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
index 79b47f638a26975..68566c7b3709791 100644
--- a/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
+++ b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
@@ -12,10 +12,10 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
 ; CHECK-NEXT:    pushl %ebx
 ; CHECK-NEXT:    pushl %edi
 ; CHECK-NEXT:    pushl %esi
-; CHECK-NEXT:    movl %edx, %esi
+; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    movb $1, %cl
 ; CHECK-NEXT:    movl $1, %ebx
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_1: # %bb.i5
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -25,11 +25,11 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
 ; CHECK-NEXT:    testb $1, %al
 ; CHECK-NEXT:    jne .LBB0_1
 ; CHECK-NEXT:  # %bb.2: # %mp_unexp_mp2d.exit.i
-; CHECK-NEXT:    je .LBB0_10
-; CHECK-NEXT:  # %bb.3: # %cond_next.i
+; CHECK-NEXT:    je .LBB0_3
+; CHECK-NEXT:  # %bb.5: # %cond_next.i
 ; CHECK-NEXT:    testb $1, %al
-; CHECK-NEXT:    jne .LBB0_10
-; CHECK-NEXT:  # %bb.4: # %cond_next36.i
+; CHECK-NEXT:    jne .LBB0_3
+; CHECK-NEXT:  # %bb.6: # %cond_next36.i
 ; CHECK-NEXT:    movl $0, 0
 ; CHECK-NEXT:    movzbl %al, %ebp
 ; CHECK-NEXT:    andl $1, %ebp
@@ -38,25 +38,26 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    xorpd %xmm1, %xmm1
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_5: # %bb.i28.i
+; CHECK-NEXT:  .LBB0_7: # %bb.i28.i
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    cvttsd2si %xmm1, %edx
-; CHECK-NEXT:    cmpl %esi, %edx
-; CHECK-NEXT:    cmovgel %eax, %edx
+; CHECK-NEXT:    cvttsd2si %xmm1, %edi
+; CHECK-NEXT:    cmpl %edx, %edi
+; CHECK-NEXT:    cmovgel %eax, %edi
 ; CHECK-NEXT:    addl $2, %ecx
 ; CHECK-NEXT:    xorps %xmm2, %xmm2
-; CHECK-NEXT:    cvtsi2sd %edx, %xmm2
+; CHECK-NEXT:    cvtsi2sd %edi, %xmm2
 ; CHECK-NEXT:    xorpd %xmm1, %xmm1
 ; CHECK-NEXT:    subsd %xmm2, %xmm1
 ; CHECK-NEXT:    mulsd %xmm0, %xmm1
 ; CHECK-NEXT:    addl $-2, %ebp
-; CHECK-NEXT:    jne .LBB0_5
-; CHECK-NEXT:  # %bb.6: # %mp_unexp_d2mp.exit29.i
+; CHECK-NEXT:    jne .LBB0_7
+; CHECK-NEXT:  # %bb.8: # %mp_unexp_d2mp.exit29.i
 ; CHECK-NEXT:    movl $0, 0
-; CHECK-NEXT:    je .LBB0_7
-; CHECK-NEXT:  # %bb.8: # %mp_sqrt_init.exit
+; CHECK-NEXT:    je .LBB0_9
+; CHECK-NEXT:  # %bb.10: # %mp_sqrt_init.exit
 ; CHECK-NEXT:    xorl %ecx, %ecx
-; CHECK-NEXT:    movl %edi, %edx
+; CHECK-NEXT:    movl %edx, %edi
+; CHECK-NEXT:    movl %esi, %edx
 ; CHECK-NEXT:    calll mp_mul_csqu at PLT
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    movl $-1, %edx
@@ -66,17 +67,31 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
 ; CHECK-NEXT:    calll rdft at PLT
 ; CHECK-NEXT:    addl $12, %esp
 ; CHECK-NEXT:    xorl %ecx, %ecx
-; CHECK-NEXT:    movl %esi, %edx
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    movl %edi, (%esp) # 4-byte Spill
+; CHECK-NEXT:    movl %edi, %edx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; CHECK-NEXT:    pushl %edi
+; CHECK-NEXT:    pushl %esi
 ; CHECK-NEXT:    pushl $0
 ; CHECK-NEXT:    calll mp_mul_d2i at PLT
 ; CHECK-NEXT:    addl $12, %esp
 ; CHECK-NEXT:    testl %ebp, %ebp
-; CHECK-NEXT:    jne .LBB0_10
-; CHECK-NEXT:  # %bb.9: # %cond_false.i
+; CHECK-NEXT:    je .LBB0_11
+; CHECK-NEXT:  .LBB0_3: # %cond_true.i
+; CHECK-NEXT:    addl $4, %esp
+; CHECK-NEXT:  .LBB0_4: # %cond_true.i
+; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    popl %edi
+; CHECK-NEXT:    popl %ebx
+; CHECK-NEXT:    popl %ebp
+; CHECK-NEXT:    retl
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_9: # %bb.i.i
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    jmp .LBB0_9
+; CHECK-NEXT:  .LBB0_11: # %cond_false.i
 ; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    movl (%esp), %esi # 4-byte Reload
 ; CHECK-NEXT:    movl %esi, %edx
 ; CHECK-NEXT:    pushl {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    pushl $0
@@ -84,43 +99,33 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
 ; CHECK-NEXT:    addl $8, %esp
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    movl %esi, %edx
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; CHECK-NEXT:    pushl %edi
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; CHECK-NEXT:    pushl %ebp
 ; CHECK-NEXT:    pushl %edi
+; CHECK-NEXT:    pushl %ebp
 ; CHECK-NEXT:    calll mp_add at PLT
 ; CHECK-NEXT:    addl $12, %esp
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    movl %esi, %edx
-; CHECK-NEXT:    pushl %ebp
-; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    pushl %edi
+; CHECK-NEXT:    pushl %edi
 ; CHECK-NEXT:    pushl {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    calll mp_sub at PLT
 ; CHECK-NEXT:    addl $12, %esp
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    movl %esi, %edx
-; CHECK-NEXT:    pushl %edi
+; CHECK-NEXT:    pushl %ebp
 ; CHECK-NEXT:    pushl $0
 ; CHECK-NEXT:    calll mp_round at PLT
 ; CHECK-NEXT:    addl $8, %esp
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    movl %esi, %edx
-; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    pushl %edi
 ; CHECK-NEXT:    pushl {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    pushl %ebx
 ; CHECK-NEXT:    calll mp_mul_d2i at PLT
-; CHECK-NEXT:    addl $12, %esp
-; CHECK-NEXT:  .LBB0_10: # %cond_true.i
-; CHECK-NEXT:    popl %esi
-; CHECK-NEXT:    popl %edi
-; CHECK-NEXT:    popl %ebx
-; CHECK-NEXT:    popl %ebp
-; CHECK-NEXT:    retl
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_7: # %bb.i.i
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    jmp .LBB0_7
+; CHECK-NEXT:    addl $16, %esp
+; CHECK-NEXT:    jmp .LBB0_4
 entry:
 	br label %bb.i5
 

diff  --git a/llvm/test/CodeGen/X86/cgp-usubo.ll b/llvm/test/CodeGen/X86/cgp-usubo.ll
index 697df09ea28f37a..57e2a2b22bc9bcf 100644
--- a/llvm/test/CodeGen/X86/cgp-usubo.ll
+++ b/llvm/test/CodeGen/X86/cgp-usubo.ll
@@ -172,11 +172,11 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) nounwin
 ; CHECK-NEXT:    je .LBB9_2
 ; CHECK-NEXT:  # %bb.1: # %t
 ; CHECK-NEXT:    movq %rdx, %rbx
-; CHECK-NEXT:    movq %rsi, %r15
 ; CHECK-NEXT:    movq %rdi, %r14
 ; CHECK-NEXT:    xorl %edi, %edi
 ; CHECK-NEXT:    cmpq %rsi, %r14
 ; CHECK-NEXT:    setb %dil
+; CHECK-NEXT:    movq %rsi, %r15
 ; CHECK-NEXT:    callq call at PLT
 ; CHECK-NEXT:    subq %r15, %r14
 ; CHECK-NEXT:    jae .LBB9_2

diff  --git a/llvm/test/CodeGen/X86/csr-split.ll b/llvm/test/CodeGen/X86/csr-split.ll
index 460e705d1847a37..f90d14347bc5bb0 100644
--- a/llvm/test/CodeGen/X86/csr-split.ll
+++ b/llvm/test/CodeGen/X86/csr-split.ll
@@ -66,23 +66,20 @@ declare signext i32 @callNonVoid(ptr) local_unnamed_addr
 define dso_local signext i32 @test2(ptr %p1) local_unnamed_addr  {
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset %rbx, -16
 ; CHECK-NEXT:    testq %rdi, %rdi
 ; CHECK-NEXT:    je .LBB1_2
 ; CHECK-NEXT:  # %bb.1: # %if.end
-; CHECK-NEXT:    movq %rdi, %rbx
 ; CHECK-NEXT:    movslq a(%rip), %rax
 ; CHECK-NEXT:    cmpq %rdi, %rax
 ; CHECK-NEXT:    je .LBB1_3
 ; CHECK-NEXT:  .LBB1_2: # %return
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ; CHECK-NEXT:  .LBB1_3: # %if.then2
+; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbx, -16
+; CHECK-NEXT:    movq %rdi, %rbx
 ; CHECK-NEXT:    callq callVoid at PLT
 ; CHECK-NEXT:    movq %rbx, %rdi
 ; CHECK-NEXT:    popq %rbx

diff  --git a/llvm/test/CodeGen/X86/ragreedy-bug.ll b/llvm/test/CodeGen/X86/ragreedy-bug.ll
index 4e2e7fb2f030dcb..0d9332ef9cdd106 100644
--- a/llvm/test/CodeGen/X86/ragreedy-bug.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-bug.ll
@@ -22,13 +22,13 @@
 ; CHECK: cond.false.i.i
 ; CHECK: maskrune
 ; CHECK-NEXT: movzbl
-; CHECK-NEXT: movzbl
+; CHECK-NEXT: movq
 ; CHECK-NEXT: testl
 ; CHECK-NEXT: je
 ; CHECK: cond.false.i.i219
 ; CHECK: maskrune
 ; CHECK-NEXT: movzbl
-; CHECK-NEXT: movzbl
+; CHECK-NEXT: movq
 ; CHECK-NEXT: testl
 ; CHECK-NEXT: jne
 

diff  --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
index 7217245cc8ac30b..e9448a800fd9597 100644
--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -65,18 +65,20 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    je LBB0_54
 ; CHECK-NEXT:  ## %bb.6: ## %SyTime.exit2720
-; CHECK-NEXT:    movq %rdx, %r14
-; CHECK-NEXT:    movq %rdi, %rbx
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; CHECK-NEXT:    cmpq %rax, %rcx
 ; CHECK-NEXT:    jae LBB0_8
 ; CHECK-NEXT:  ## %bb.7: ## %for.body.lr.ph
+; CHECK-NEXT:    movq %rdx, %rbx
 ; CHECK-NEXT:    movl $512, %edx ## imm = 0x200
 ; CHECK-NEXT:    movl $32, %esi
+; CHECK-NEXT:    movq %rdi, %r14
 ; CHECK-NEXT:    callq _memset
+; CHECK-NEXT:    movq %r14, %rdi
+; CHECK-NEXT:    movq %rbx, %rdx
 ; CHECK-NEXT:  LBB0_8: ## %while.body.preheader
-; CHECK-NEXT:    imulq $1040, %r14, %rax ## imm = 0x410
+; CHECK-NEXT:    imulq $1040, %rdx, %rax ## imm = 0x410
 ; CHECK-NEXT:    movq _syBuf at GOTPCREL(%rip), %rcx
 ; CHECK-NEXT:    leaq 8(%rcx,%rax), %rdx
 ; CHECK-NEXT:    movl $1, %r13d
@@ -90,7 +92,7 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jne LBB0_9
 ; CHECK-NEXT:  ## %bb.10: ## %do.end
 ; CHECK-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
 ; CHECK-NEXT:    xorl %ebp, %ebp
 ; CHECK-NEXT:    testb %bpl, %bpl
 ; CHECK-NEXT:    jne LBB0_11

diff  --git a/llvm/test/CodeGen/X86/speculative-load-hardening.ll b/llvm/test/CodeGen/X86/speculative-load-hardening.ll
index 2352bc768ae8284..0c47fcddc43af2a 100644
--- a/llvm/test/CodeGen/X86/speculative-load-hardening.ll
+++ b/llvm/test/CodeGen/X86/speculative-load-hardening.ll
@@ -42,16 +42,16 @@ define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, ptr %ptr1, ptr %ptr2,
 ; X64-NEXT:    .cfi_offset %r14, -24
 ; X64-NEXT:    .cfi_offset %r15, -16
 ; X64-NEXT:    movq %rsp, %rax
-; X64-NEXT:    movq $-1, %r14
+; X64-NEXT:    movq $-1, %rbx
 ; X64-NEXT:    sarq $63, %rax
 ; X64-NEXT:    testl %edi, %edi
 ; X64-NEXT:    jne .LBB1_1
 ; X64-NEXT:  # %bb.2: # %then1
-; X64-NEXT:    cmovneq %r14, %rax
+; X64-NEXT:    cmovneq %rbx, %rax
 ; X64-NEXT:    testl %esi, %esi
 ; X64-NEXT:    je .LBB1_4
 ; X64-NEXT:  .LBB1_1:
-; X64-NEXT:    cmoveq %r14, %rax
+; X64-NEXT:    cmoveq %rbx, %rax
 ; X64-NEXT:  .LBB1_8: # %exit
 ; X64-NEXT:    shlq $47, %rax
 ; X64-NEXT:    orq %rax, %rsp
@@ -64,42 +64,43 @@ define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, ptr %ptr1, ptr %ptr2,
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB1_4: # %then2
 ; X64-NEXT:    .cfi_def_cfa_offset 32
-; X64-NEXT:    movq %r8, %rbx
-; X64-NEXT:    cmovneq %r14, %rax
+; X64-NEXT:    cmovneq %rbx, %rax
 ; X64-NEXT:    testl %edx, %edx
 ; X64-NEXT:    je .LBB1_6
 ; X64-NEXT:  # %bb.5: # %else3
-; X64-NEXT:    cmoveq %r14, %rax
+; X64-NEXT:    cmoveq %rbx, %rax
 ; X64-NEXT:    movslq (%r9), %rcx
 ; X64-NEXT:    orq %rax, %rcx
-; X64-NEXT:    leaq (%rbx,%rcx,4), %r15
-; X64-NEXT:    movl %ecx, (%rbx,%rcx,4)
+; X64-NEXT:    leaq (%r8,%rcx,4), %r14
+; X64-NEXT:    movl %ecx, (%r8,%rcx,4)
 ; X64-NEXT:    jmp .LBB1_7
 ; X64-NEXT:  .LBB1_6: # %then3
-; X64-NEXT:    cmovneq %r14, %rax
+; X64-NEXT:    cmovneq %rbx, %rax
 ; X64-NEXT:    movl (%rcx), %ecx
-; X64-NEXT:    addl (%rbx), %ecx
+; X64-NEXT:    addl (%r8), %ecx
 ; X64-NEXT:    movslq %ecx, %rdi
 ; X64-NEXT:    orq %rax, %rdi
-; X64-NEXT:    movl (%rbx,%rdi,4), %esi
+; X64-NEXT:    movl (%r8,%rdi,4), %esi
 ; X64-NEXT:    orl %eax, %esi
-; X64-NEXT:    movq (%r9), %r15
-; X64-NEXT:    orq %rax, %r15
-; X64-NEXT:    addl (%r15), %esi
+; X64-NEXT:    movq (%r9), %r14
+; X64-NEXT:    orq %rax, %r14
+; X64-NEXT:    addl (%r14), %esi
 ; X64-NEXT:    shlq $47, %rax
 ; X64-NEXT:    # kill: def $edi killed $edi killed $rdi
 ; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    movq %r8, %r15
 ; X64-NEXT:    callq leak at PLT
 ; X64-NEXT:  .Lslh_ret_addr0:
+; X64-NEXT:    movq %r15, %r8
 ; X64-NEXT:    movq %rsp, %rax
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
 ; X64-NEXT:    sarq $63, %rax
 ; X64-NEXT:    cmpq $.Lslh_ret_addr0, %rcx
-; X64-NEXT:    cmovneq %r14, %rax
+; X64-NEXT:    cmovneq %rbx, %rax
 ; X64-NEXT:  .LBB1_7: # %merge
-; X64-NEXT:    movslq (%r15), %rcx
+; X64-NEXT:    movslq (%r14), %rcx
 ; X64-NEXT:    orq %rax, %rcx
-; X64-NEXT:    movl $0, (%rbx,%rcx,4)
+; X64-NEXT:    movl $0, (%r8,%rcx,4)
 ; X64-NEXT:    jmp .LBB1_8
 ;
 ; X64-LFENCE-LABEL: test_basic_conditions:
@@ -119,29 +120,30 @@ define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, ptr %ptr1, ptr %ptr2,
 ; X64-LFENCE-NEXT:    testl %esi, %esi
 ; X64-LFENCE-NEXT:    jne .LBB1_6
 ; X64-LFENCE-NEXT:  # %bb.2: # %then2
-; X64-LFENCE-NEXT:    movq %r8, %rbx
 ; X64-LFENCE-NEXT:    lfence
 ; X64-LFENCE-NEXT:    testl %edx, %edx
 ; X64-LFENCE-NEXT:    je .LBB1_3
 ; X64-LFENCE-NEXT:  # %bb.4: # %else3
 ; X64-LFENCE-NEXT:    lfence
 ; X64-LFENCE-NEXT:    movslq (%r9), %rax
-; X64-LFENCE-NEXT:    leaq (%rbx,%rax,4), %r14
-; X64-LFENCE-NEXT:    movl %eax, (%rbx,%rax,4)
+; X64-LFENCE-NEXT:    leaq (%r8,%rax,4), %rbx
+; X64-LFENCE-NEXT:    movl %eax, (%r8,%rax,4)
 ; X64-LFENCE-NEXT:    jmp .LBB1_5
 ; X64-LFENCE-NEXT:  .LBB1_3: # %then3
 ; X64-LFENCE-NEXT:    lfence
 ; X64-LFENCE-NEXT:    movl (%rcx), %eax
-; X64-LFENCE-NEXT:    addl (%rbx), %eax
+; X64-LFENCE-NEXT:    addl (%r8), %eax
 ; X64-LFENCE-NEXT:    movslq %eax, %rdi
-; X64-LFENCE-NEXT:    movl (%rbx,%rdi,4), %esi
-; X64-LFENCE-NEXT:    movq (%r9), %r14
-; X64-LFENCE-NEXT:    addl (%r14), %esi
+; X64-LFENCE-NEXT:    movl (%r8,%rdi,4), %esi
+; X64-LFENCE-NEXT:    movq (%r9), %rbx
+; X64-LFENCE-NEXT:    addl (%rbx), %esi
 ; X64-LFENCE-NEXT:    # kill: def $edi killed $edi killed $rdi
+; X64-LFENCE-NEXT:    movq %r8, %r14
 ; X64-LFENCE-NEXT:    callq leak at PLT
+; X64-LFENCE-NEXT:    movq %r14, %r8
 ; X64-LFENCE-NEXT:  .LBB1_5: # %merge
-; X64-LFENCE-NEXT:    movslq (%r14), %rax
-; X64-LFENCE-NEXT:    movl $0, (%rbx,%rax,4)
+; X64-LFENCE-NEXT:    movslq (%rbx), %rax
+; X64-LFENCE-NEXT:    movl $0, (%r8,%rax,4)
 ; X64-LFENCE-NEXT:  .LBB1_6: # %exit
 ; X64-LFENCE-NEXT:    lfence
 ; X64-LFENCE-NEXT:    addq $8, %rsp
@@ -501,6 +503,142 @@ declare ptr @__cxa_allocate_exception(i64) local_unnamed_addr
 declare void @__cxa_throw(ptr, ptr, ptr) local_unnamed_addr
 
 define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_hardening personality ptr @__gxx_personality_v0 {
+; X64-LABEL: test_basic_eh:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    .cfi_def_cfa_offset 24
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 40
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 48
+; X64-NEXT:    .cfi_offset %rbx, -40
+; X64-NEXT:    .cfi_offset %r14, -32
+; X64-NEXT:    .cfi_offset %r15, -24
+; X64-NEXT:    .cfi_offset %rbp, -16
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    movq $-1, %rbx
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    cmpl $41, %edi
+; X64-NEXT:    jg .LBB4_1
+; X64-NEXT:  # %bb.2: # %thrower
+; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    cmovgq %rbx, %rax
+; X64-NEXT:    movslq %edi, %rcx
+; X64-NEXT:    movq %rsi, %r15
+; X64-NEXT:    movl (%rsi,%rcx,4), %ebp
+; X64-NEXT:    orl %eax, %ebp
+; X64-NEXT:    movl $4, %edi
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq __cxa_allocate_exception at PLT
+; X64-NEXT:  .Lslh_ret_addr4:
+; X64-NEXT:    movq %rsp, %rcx
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    sarq $63, %rcx
+; X64-NEXT:    cmpq $.Lslh_ret_addr4, %rdx
+; X64-NEXT:    cmovneq %rbx, %rcx
+; X64-NEXT:    movl %ebp, (%rax)
+; X64-NEXT:  .Ltmp0:
+; X64-NEXT:    shlq $47, %rcx
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    xorl %esi, %esi
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    orq %rcx, %rsp
+; X64-NEXT:    callq __cxa_throw at PLT
+; X64-NEXT:  .Lslh_ret_addr5:
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    cmpq $.Lslh_ret_addr5, %rcx
+; X64-NEXT:    cmovneq %rbx, %rax
+; X64-NEXT:  .Ltmp1:
+; X64-NEXT:    jmp .LBB4_3
+; X64-NEXT:  .LBB4_1:
+; X64-NEXT:    cmovleq %rbx, %rax
+; X64-NEXT:  .LBB4_3: # %exit
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    addq $8, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 40
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    popq %r14
+; X64-NEXT:    .cfi_def_cfa_offset 24
+; X64-NEXT:    popq %r15
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB4_4: # %lpad
+; X64-NEXT:    .cfi_def_cfa_offset 48
+; X64-NEXT:  .Ltmp2:
+; X64-NEXT:    movq %rsp, %rcx
+; X64-NEXT:    sarq $63, %rcx
+; X64-NEXT:    movl (%rax), %eax
+; X64-NEXT:    addl (%r15), %eax
+; X64-NEXT:    cltq
+; X64-NEXT:    orq %rcx, %rax
+; X64-NEXT:    movl (%r14,%rax,4), %edi
+; X64-NEXT:    orl %ecx, %edi
+; X64-NEXT:    shlq $47, %rcx
+; X64-NEXT:    orq %rcx, %rsp
+; X64-NEXT:    callq sink at PLT
+; X64-NEXT:  .Lslh_ret_addr6:
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    cmpq $.Lslh_ret_addr6, %rcx
+; X64-NEXT:    cmovneq %rbx, %rax
+;
+; X64-LFENCE-LABEL: test_basic_eh:
+; X64-LFENCE:       # %bb.0: # %entry
+; X64-LFENCE-NEXT:    pushq %rbp
+; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 16
+; X64-LFENCE-NEXT:    pushq %r14
+; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 24
+; X64-LFENCE-NEXT:    pushq %rbx
+; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 32
+; X64-LFENCE-NEXT:    .cfi_offset %rbx, -32
+; X64-LFENCE-NEXT:    .cfi_offset %r14, -24
+; X64-LFENCE-NEXT:    .cfi_offset %rbp, -16
+; X64-LFENCE-NEXT:    cmpl $41, %edi
+; X64-LFENCE-NEXT:    jg .LBB4_2
+; X64-LFENCE-NEXT:  # %bb.1: # %thrower
+; X64-LFENCE-NEXT:    movq %rdx, %rbx
+; X64-LFENCE-NEXT:    movq %rsi, %r14
+; X64-LFENCE-NEXT:    lfence
+; X64-LFENCE-NEXT:    movslq %edi, %rax
+; X64-LFENCE-NEXT:    movl (%rsi,%rax,4), %ebp
+; X64-LFENCE-NEXT:    movl $4, %edi
+; X64-LFENCE-NEXT:    callq __cxa_allocate_exception at PLT
+; X64-LFENCE-NEXT:    movl %ebp, (%rax)
+; X64-LFENCE-NEXT:  .Ltmp0:
+; X64-LFENCE-NEXT:    movq %rax, %rdi
+; X64-LFENCE-NEXT:    xorl %esi, %esi
+; X64-LFENCE-NEXT:    xorl %edx, %edx
+; X64-LFENCE-NEXT:    callq __cxa_throw at PLT
+; X64-LFENCE-NEXT:  .Ltmp1:
+; X64-LFENCE-NEXT:  .LBB4_2: # %exit
+; X64-LFENCE-NEXT:    lfence
+; X64-LFENCE-NEXT:    popq %rbx
+; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 24
+; X64-LFENCE-NEXT:    popq %r14
+; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 16
+; X64-LFENCE-NEXT:    popq %rbp
+; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 8
+; X64-LFENCE-NEXT:    retq
+; X64-LFENCE-NEXT:  .LBB4_3: # %lpad
+; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 32
+; X64-LFENCE-NEXT:  .Ltmp2:
+; X64-LFENCE-NEXT:    movl (%rax), %eax
+; X64-LFENCE-NEXT:    addl (%r14), %eax
+; X64-LFENCE-NEXT:    cltq
+; X64-LFENCE-NEXT:    movl (%rbx,%rax,4), %edi
+; X64-LFENCE-NEXT:    callq sink at PLT
 entry:
   %a.cmp = icmp slt i32 %a, 42
   br i1 %a.cmp, label %thrower, label %exit

diff  --git a/llvm/test/CodeGen/X86/split-reg-with-hint.ll b/llvm/test/CodeGen/X86/split-reg-with-hint.ll
new file mode 100644
index 000000000000000..689f83ff0adc4c8
--- /dev/null
+++ b/llvm/test/CodeGen/X86/split-reg-with-hint.ll
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+
+; %ptr has a hint to %rdi in entry block, it also has a interference with %rdi
+; in block if.then. It should be split in cold block if.then.
+; Similarly %p2, %p3, %p4, %p5 and %p6 should also be split in cold block
+; if.then.
+
+define ptr @foo(ptr %ptr, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6) {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    testq %rdi, %rdi
+; CHECK-NEXT:    je .LBB0_1
+; CHECK-NEXT:  # %bb.2: # %if.end
+; CHECK-NEXT:    incq %rdi
+; CHECK-NEXT:    jmp qux at PLT # TAILCALL
+; CHECK-NEXT:  .LBB0_1: # %if.then
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset %rbx, -48
+; CHECK-NEXT:    .cfi_offset %r12, -40
+; CHECK-NEXT:    .cfi_offset %r13, -32
+; CHECK-NEXT:    .cfi_offset %r14, -24
+; CHECK-NEXT:    .cfi_offset %r15, -16
+; CHECK-NEXT:    movq %rsi, %rbx
+; CHECK-NEXT:    movq %rdx, %r14
+; CHECK-NEXT:    movq %rcx, %r15
+; CHECK-NEXT:    movq %r8, %r12
+; CHECK-NEXT:    movq %r9, %r13
+; CHECK-NEXT:    callq bar at PLT
+; CHECK-NEXT:    movq %rbx, %rsi
+; CHECK-NEXT:    movq %r14, %rdx
+; CHECK-NEXT:    movq %r15, %rcx
+; CHECK-NEXT:    movq %r12, %r8
+; CHECK-NEXT:    movq %r13, %r9
+; CHECK-NEXT:    movq %rax, %rdi
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    popq %r13
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    .cfi_restore %rbx
+; CHECK-NEXT:    .cfi_restore %r12
+; CHECK-NEXT:    .cfi_restore %r13
+; CHECK-NEXT:    .cfi_restore %r14
+; CHECK-NEXT:    .cfi_restore %r15
+; CHECK-NEXT:    incq %rdi
+; CHECK-NEXT:    jmp qux at PLT # TAILCALL
+entry:
+  %tobool.not = icmp eq ptr %ptr, null
+  br i1 %tobool.not, label %if.then, label %if.end, !prof !5
+
+if.then:                                          ; preds = %entry
+  %call = tail call ptr @bar(ptr %ptr, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6)
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %ptr.addr.0 = phi ptr [ %call, %if.then ], [ %ptr, %entry ]
+  %incdec.ptr = getelementptr inbounds i8, ptr %ptr.addr.0, i64 1
+  %call2 = tail call ptr @qux(ptr %incdec.ptr, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6)
+  ret ptr %call2
+}
+
+!5 = !{!"branch_weights", i32 1, i32 2000}
+
+declare ptr @bar(ptr, i64, i64, i64, i64, i64)
+declare ptr @qux(ptr, i64, i64, i64, i64, i64)

diff  --git a/llvm/test/CodeGen/X86/statepoint-call-lowering.ll b/llvm/test/CodeGen/X86/statepoint-call-lowering.ll
index 267c2596020e868..7535966523a6399 100644
--- a/llvm/test/CodeGen/X86/statepoint-call-lowering.ll
+++ b/llvm/test/CodeGen/X86/statepoint-call-lowering.ll
@@ -158,34 +158,29 @@ declare void @consume(ptr addrspace(1) %obj)
 define i1 @test_cross_bb(ptr addrspace(1) %a, i1 %external_cond) gc "statepoint-example" {
 ; CHECK-LABEL: test_cross_bb:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushq %rbp
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 24
-; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    subq $16, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset %rbx, -24
-; CHECK-NEXT:    .cfi_offset %rbp, -16
-; CHECK-NEXT:    movl %esi, %ebp
-; CHECK-NEXT:    movq %rdi, (%rsp)
+; CHECK-NEXT:    .cfi_offset %rbx, -16
+; CHECK-NEXT:    movl %esi, %ebx
+; CHECK-NEXT:    movq %rdi, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    callq return_i1 at PLT
 ; CHECK-NEXT:  .Ltmp8:
-; CHECK-NEXT:    testb $1, %bpl
+; CHECK-NEXT:    testb $1, %bl
 ; CHECK-NEXT:    je .LBB8_2
 ; CHECK-NEXT:  # %bb.1: # %left
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
 ; CHECK-NEXT:    movl %eax, %ebx
-; CHECK-NEXT:    movq (%rsp), %rdi
 ; CHECK-NEXT:    callq consume at PLT
 ; CHECK-NEXT:    movl %ebx, %eax
 ; CHECK-NEXT:    jmp .LBB8_3
 ; CHECK-NEXT:  .LBB8_2: # %right
 ; CHECK-NEXT:    movb $1, %al
 ; CHECK-NEXT:  .LBB8_3: # %right
-; CHECK-NEXT:    addq $8, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 24
-; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    addq $16, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:

diff  --git a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir
index fe057a8d78eb1ae..4efddd57cf5b0b1 100644
--- a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir
+++ b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir
@@ -296,7 +296,8 @@ body:             |
   ; CHECK-NEXT:   successors: {{$}}
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
-  ; CHECK-NEXT:   dead [[COPY1]]:gr64, dead [[COPY]]:gr64 = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @barney, undef $rdi, 2, 0, 2, 0, 2, 45, 2, 0, 2, 2, 2, 0, 2, 0, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 2, 2, 1, 2, 71, 2, 0, 2, 5, 2, 0, 2, 0, [[COPY1]], 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 2, 2, 5, 2, 1, 2, 0, 2, 2, 2, 0, 2, 0, [[COPY]], 2, 7, 2, 0, 2, 1, 2, 6, 2, 0, 2, 0, 2, 1, 2, 1, 2, 0, [[COPY]], 2, 8, 2, 10, 2, 2, [[COPY1]](tied-def 0), [[COPY]](tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gr64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   dead [[COPY2]]:gr64, dead [[COPY]]:gr64 = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @barney, undef $rdi, 2, 0, 2, 0, 2, 45, 2, 0, 2, 2, 2, 0, 2, 0, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 2, 2, 1, 2, 71, 2, 0, 2, 5, 2, 0, 2, 0, [[COPY2]], 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 2, 2, 5, 2, 1, 2, 0, 2, 2, 2, 0, 2, 0, [[COPY]], 2, 7, 2, 0, 2, 1, 2, 6, 2, 0, 2, 0, 2, 1, 2, 1, 2, 0, [[COPY]], 2, 8, 2, 10, 2, 2, [[COPY2]](tied-def 0), [[COPY]](tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp
   ; CHECK-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.bb17:
@@ -348,9 +349,9 @@ body:             |
   ; CHECK-NEXT:   [[MOV64rm2:%[0-9]+]]:gr64 = MOV64rm undef %59:gr64, 1, $noreg, 0, $noreg :: (load unordered (s64) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[NOT64r2:%[0-9]+]]:gr64 = NOT64r [[NOT64r2]]
   ; CHECK-NEXT:   CMP64rr [[NOT64r2]], [[COPY6]], implicit-def $eflags
-  ; CHECK-NEXT:   undef %100.sub_32bit:gr64_with_sub_8bit = MOV32ri 0
-  ; CHECK-NEXT:   [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[CMOV64rr]], %100, 4, implicit killed $eflags
-  ; CHECK-NEXT:   INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4521993 /* reguse:GR64 */, %100, 4521993 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags
+  ; CHECK-NEXT:   undef [[MOV32ri2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32ri 0
+  ; CHECK-NEXT:   [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[CMOV64rr]], [[MOV32ri2]], 4, implicit killed $eflags
+  ; CHECK-NEXT:   INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4521993 /* reguse:GR64 */, [[MOV32ri2]], 4521993 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags
   ; CHECK-NEXT:   LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, [[COPY5]], implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
   ; CHECK-NEXT:   $rdi = COPY [[COPY4]]

diff  --git a/llvm/test/CodeGen/X86/statepoint-ra.ll b/llvm/test/CodeGen/X86/statepoint-ra.ll
index 9f10b7dd19322b7..4e57648820c4b30 100644
--- a/llvm/test/CodeGen/X86/statepoint-ra.ll
+++ b/llvm/test/CodeGen/X86/statepoint-ra.ll
@@ -65,53 +65,53 @@ declare token @llvm.experimental.gc.statepoint.p0(i64 , i32 , ptr, i32 , i32 , .
 ;CHECK:   bb.0.bb:
 ;CHECK:     successors: %bb.2(0x40000000), %bb.1(0x40000000)
 ;CHECK:     liveins: $rdi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5, $xmm6, $xmm7
-;CHECK:     %49:fr64 = COPY $xmm7
+;CHECK:     %55:fr64 = COPY $xmm7
 ;CHECK:     %10:fr64 = COPY $xmm6
-;CHECK:     %41:fr64 = COPY $xmm5
-;CHECK:     %45:fr64 = COPY $xmm4
-;CHECK:     %53:fr64 = COPY $xmm3
+;CHECK:     %45:fr64 = COPY $xmm5
+;CHECK:     %52:fr64 = COPY $xmm4
+;CHECK:     %59:fr64 = COPY $xmm3
 ;CHECK:     %6:fr64 = COPY $xmm2
-;CHECK:     %58:fr64 = COPY $xmm1
-;CHECK:     %62:fr64 = COPY $xmm0
+;CHECK:     %64:fr64 = COPY $xmm1
+;CHECK:     %68:fr64 = COPY $xmm0
 ;CHECK:     %3:gr64 = COPY $rdi
-;CHECK:     %76:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0)
+;CHECK:     %82:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0)
 ;CHECK:     %14:fr64 = MOVSDrm_alt %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.1, align 16)
-;CHECK:     %66:fr64 = MOVSDrm_alt %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2)
-;CHECK:     %71:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16)
+;CHECK:     %72:fr64 = MOVSDrm_alt %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2)
+;CHECK:     %77:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16)
 ;CHECK:     MOV64mr %stack.0, 1, $noreg, 0, $noreg, %3 :: (store (s64) into %stack.0)
 ;CHECK:     ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
-;CHECK:     STATEPOINT 2882400000, 0, 0, target-flags(x86-plt) @blam, 2, 9, 2, 0, 2, 59, 2, 0, 2, 1, 2, 0, 2, 0, 2, 0, 2, 26, 2, 0, 2, 0, 1, 8, %stack.0, 0, 2, 4, %62, 2, 7, 2, 0, 2, 4, %58, 2, 7, 2, 0, 2, 4, %6, 2, 7, 2, 0, 2, 4, %53, 2, 7, 2, 0, 2, 4, %45, 2, 7, 2, 0, 2, 4, %41, 2, 7, 2, 0, 2, 4, %10, 2, 7, 2, 0, 2, 4, %49, 2, 7, 2, 0, 2, 4, %71, 2, 7, 2, 0, 2, 4, %66, 2, 7, 2, 0, 2, 4, %14, 2, 7, 2, 0, 2, 4, %76, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64_mostregs, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s64) on %stack.0)
+;CHECK:     STATEPOINT 2882400000, 0, 0, target-flags(x86-plt) @blam, 2, 9, 2, 0, 2, 59, 2, 0, 2, 1, 2, 0, 2, 0, 2, 0, 2, 26, 2, 0, 2, 0, 1, 8, %stack.0, 0, 2, 4, %68, 2, 7, 2, 0, 2, 4, %64, 2, 7, 2, 0, 2, 4, %6, 2, 7, 2, 0, 2, 4, %59, 2, 7, 2, 0, 2, 4, %52, 2, 7, 2, 0, 2, 4, %45, 2, 7, 2, 0, 2, 4, %10, 2, 7, 2, 0, 2, 4, %55, 2, 7, 2, 0, 2, 4, %77, 2, 7, 2, 0, 2, 4, %72, 2, 7, 2, 0, 2, 4, %14, 2, 7, 2, 0, 2, 4, %82, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64_mostregs, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s64) on %stack.0)
 ;CHECK:     ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
 ;CHECK:     %17:gr32 = MOV32r0 implicit-def dead $eflags
 ;CHECK:     TEST8rr %17.sub_8bit, %17.sub_8bit, implicit-def $eflags
-;CHECK:     MOVSDmr %stack.1, 1, $noreg, 0, $noreg, %41 :: (store (s64) into %stack.1)
-;CHECK:     MOVSDmr %stack.2, 1, $noreg, 0, $noreg, %45 :: (store (s64) into %stack.2)
-;CHECK:     MOVSDmr %stack.5, 1, $noreg, 0, $noreg, %58 :: (store (s64) into %stack.5)
-;CHECK:     MOVSDmr %stack.6, 1, $noreg, 0, $noreg, %62 :: (store (s64) into %stack.6)
+;CHECK:     MOVSDmr %stack.1, 1, $noreg, 0, $noreg, %45 :: (store (s64) into %stack.1)
+;CHECK:     MOVSDmr %stack.2, 1, $noreg, 0, $noreg, %52 :: (store (s64) into %stack.2)
+;CHECK:     MOVSDmr %stack.5, 1, $noreg, 0, $noreg, %64 :: (store (s64) into %stack.5)
+;CHECK:     MOVSDmr %stack.6, 1, $noreg, 0, $noreg, %68 :: (store (s64) into %stack.6)
 ;CHECK:     JCC_1 %bb.2, 4, implicit killed $eflags
 ;CHECK:   bb.1:
 ;CHECK:     successors: %bb.3(0x80000000)
-;CHECK:     %54:fr64 = MOVSDrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s64) from constant-pool)
-;CHECK:     MOVSDmr %stack.3, 1, $noreg, 0, $noreg, %54 :: (store (s64) into %stack.3)
-;CHECK:     MOVSDmr %stack.4, 1, $noreg, 0, $noreg, %54 :: (store (s64) into %stack.4)
-;CHECK:     MOVSDmr %stack.7, 1, $noreg, 0, $noreg, %54 :: (store (s64) into %stack.7)
+;CHECK:     %60:fr64 = MOVSDrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s64) from constant-pool)
+;CHECK:     MOVSDmr %stack.3, 1, $noreg, 0, $noreg, %60 :: (store (s64) into %stack.3)
+;CHECK:     MOVSDmr %stack.4, 1, $noreg, 0, $noreg, %60 :: (store (s64) into %stack.4)
+;CHECK:     MOVSDmr %stack.7, 1, $noreg, 0, $noreg, %60 :: (store (s64) into %stack.7)
 ;CHECK:     JMP_1 %bb.3
 ;CHECK:   bb.2.bb13:
 ;CHECK:     successors: %bb.3(0x80000000)
 ;CHECK:     ADJCALLSTACKDOWN64 8, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
 ;CHECK:     MOVSDmr $rsp, 1, $noreg, 0, $noreg, %14 :: (store (s64) into stack)
 ;CHECK:     dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
-;CHECK:     $xmm0 = COPY %62
-;CHECK:     $xmm1 = COPY %58
+;CHECK:     $xmm0 = COPY %68
+;CHECK:     $xmm1 = COPY %64
 ;CHECK:     $xmm2 = COPY %6
-;CHECK:     $xmm3 = COPY %45
-;CHECK:     $xmm4 = COPY %41
+;CHECK:     $xmm3 = COPY %52
+;CHECK:     $xmm4 = COPY %45
 ;CHECK:     $xmm5 = COPY %10
-;CHECK:     $xmm6 = COPY %71
-;CHECK:     $xmm7 = COPY %66
-;CHECK:     MOVSDmr %stack.3, 1, $noreg, 0, $noreg, %49 :: (store (s64) into %stack.3)
-;CHECK:     MOVSDmr %stack.4, 1, $noreg, 0, $noreg, %53 :: (store (s64) into %stack.4)
-;CHECK:     MOVSDmr %stack.7, 1, $noreg, 0, $noreg, %76 :: (store (s64) into %stack.7)
+;CHECK:     $xmm6 = COPY %77
+;CHECK:     $xmm7 = COPY %72
+;CHECK:     MOVSDmr %stack.3, 1, $noreg, 0, $noreg, %55 :: (store (s64) into %stack.3)
+;CHECK:     MOVSDmr %stack.4, 1, $noreg, 0, $noreg, %59 :: (store (s64) into %stack.4)
+;CHECK:     MOVSDmr %stack.7, 1, $noreg, 0, $noreg, %82 :: (store (s64) into %stack.7)
 ;CHECK:     STATEPOINT 2, 5, 9, undef %22:gr64, $rdi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5, $xmm6, $xmm7, 2, 0, 2, 0, 2, 59, 2, 0, 2, 2, 2, 0, 2, 70, 2, 0, 2, 26, 2, 0, 2, 0, 2, 0, 2, 4, 1, 8, %stack.6, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.5, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.4, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.0, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 2, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (load (s64) from %stack.1), (load (s64) from %stack.2), (load (s64) from %stack.3), (load (s64) from %stack.4), (load (s64) from %stack.5), (load (s64) from %stack.6), (load (s64) from %fixed-stack.2), (load (s64) from %fixed-stack.3, align 16), (load (s64) from %fixed-stack.0)
 ;CHECK:     ADJCALLSTACKUP64 8, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
 ;CHECK:   bb.3.bb15:
@@ -132,19 +132,19 @@ declare token @llvm.experimental.gc.statepoint.p0(i64 , i32 , ptr, i32 , i32 , .
 ;CHECK:   bb.5.bb21:
 ;CHECK:     successors:
 ;CHECK:     ADJCALLSTACKDOWN64 8, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
-;CHECK:     %79:fr64 = MOVSDrm_alt %stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %stack.7)
-;CHECK:     MOVSDmr $rsp, 1, $noreg, 0, $noreg, %79 :: (store (s64) into stack)
+;CHECK:     %85:fr64 = MOVSDrm_alt %stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %stack.7)
+;CHECK:     MOVSDmr $rsp, 1, $noreg, 0, $noreg, %85 :: (store (s64) into stack)
 ;CHECK:     $xmm0 = MOVSDrm_alt %stack.6, 1, $noreg, 0, $noreg :: (load (s64) from %stack.6)
 ;CHECK:     $xmm1 = MOVSDrm_alt %stack.5, 1, $noreg, 0, $noreg :: (load (s64) from %stack.5)
 ;CHECK:     $xmm2 = MOVSDrm_alt %stack.4, 1, $noreg, 0, $noreg :: (load (s64) from %stack.4)
 ;CHECK:     $xmm3 = MOVSDrm_alt %stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %stack.2)
 ;CHECK:     $xmm4 = MOVSDrm_alt %stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %stack.1)
 ;CHECK:     $xmm5 = MOVSDrm_alt %stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %stack.3)
-;CHECK:     %74:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16)
-;CHECK:     $xmm6 = COPY %74
+;CHECK:     %80:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16)
+;CHECK:     $xmm6 = COPY %80
 ;CHECK:     $esi = MOV32ri 51
-;CHECK:     %69:fr64 = MOVSDrm_alt %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2)
-;CHECK:     $xmm7 = COPY %69
+;CHECK:     %75:fr64 = MOVSDrm_alt %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2)
+;CHECK:     $xmm7 = COPY %75
 ;CHECK:     STATEPOINT 2, 5, 10, undef %36:gr64, undef $rdi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5, $xmm6, $xmm7, killed $esi, 2, 0, 2, 0, 2, 105, 2, 0, 2, 2, 2, 0, 2, 97, 2, 0, 2, 26, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 2, 2, 2, 2, 46, 2, 0, 2, 20, 2, 0, 2, 0, 2, 4278124286, 2, 4, 1, 8, %stack.6, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.5, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.4, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.7, 0, 2, 7, 2, 0, 2, 3, 2, 51, 2, 1, 2, 4278124286, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (load (s64) from %stack.1), (load (s64) from %stack.2), (load (s64) from %stack.3), (load (s64) from %stack.4), (load (s64) from %stack.5), (load (s64) from %stack.6), (load (s64) from %fixed-stack.2), (load (s64) from %fixed-stack.3, align 16), (load (s64) from %stack.7)
 ;CHECK:     ADJCALLSTACKUP64 8, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
 ;CHECK:   bb.6.bb23 (landing-pad):

diff  --git a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll
index 3e63abdcd2e2536..a33993444d02307 100644
--- a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll
+++ b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll
@@ -31,7 +31,7 @@ define i1 @test_relocate(ptr addrspace(1) %a) gc "statepoint-example" {
 ; CHECK-PREG-LABEL: name:            test_relocate
 ; CHECK-PREG:    renamable $rbx = COPY $rdi
 ; CHECK-PREG:    renamable $rbx = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 2, 1, killed renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al
-; CHECK-PREG:    renamable $bpl = COPY killed $al
+; CHECK-PREG:    renamable $bpl = COPY $al
 ; CHECK-PREG:    $rdi = COPY killed renamable $rbx
 ; CHECK-PREG:    CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp
 

diff  --git a/llvm/test/CodeGen/X86/statepoint-vreg.ll b/llvm/test/CodeGen/X86/statepoint-vreg.ll
index 12b1f55fa762f6c..d9a8ab0ca8e3c29 100644
--- a/llvm/test/CodeGen/X86/statepoint-vreg.ll
+++ b/llvm/test/CodeGen/X86/statepoint-vreg.ll
@@ -244,31 +244,30 @@ define i1 @test_cross_bb(ptr addrspace(1) %a, i1 %external_cond) gc "statepoint-
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    pushq %r14
-; CHECK-NEXT:    .cfi_def_cfa_offset 24
 ; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset %rbx, -32
-; CHECK-NEXT:    .cfi_offset %r14, -24
+; CHECK-NEXT:    .cfi_offset %rbx, -24
 ; CHECK-NEXT:    .cfi_offset %rbp, -16
-; CHECK-NEXT:    movl %esi, %r14d
+; CHECK-NEXT:    movl %esi, %ebp
 ; CHECK-NEXT:    movq %rdi, %rbx
 ; CHECK-NEXT:    callq return_i1 at PLT
 ; CHECK-NEXT:  .Ltmp7:
-; CHECK-NEXT:    testb $1, %r14b
+; CHECK-NEXT:    testb $1, %bpl
 ; CHECK-NEXT:    je .LBB7_2
 ; CHECK-NEXT:  # %bb.1: # %left
-; CHECK-NEXT:    movl %eax, %ebp
 ; CHECK-NEXT:    movq %rbx, %rdi
+; CHECK-NEXT:    movl %eax, %ebx
 ; CHECK-NEXT:    callq consume at PLT
-; CHECK-NEXT:    movl %ebp, %eax
+; CHECK-NEXT:    movl %ebx, %eax
 ; CHECK-NEXT:    jmp .LBB7_3
 ; CHECK-NEXT:  .LBB7_2: # %right
 ; CHECK-NEXT:    movb $1, %al
 ; CHECK-NEXT:  .LBB7_3: # %right
-; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    addq $8, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 24
-; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8

diff  --git a/llvm/test/DebugInfo/ARM/sdag-split-arg.ll b/llvm/test/DebugInfo/ARM/sdag-split-arg.ll
index 9699c102c0b76b8..de1d822a8c8015f 100644
--- a/llvm/test/DebugInfo/ARM/sdag-split-arg.ll
+++ b/llvm/test/DebugInfo/ARM/sdag-split-arg.ll
@@ -19,8 +19,8 @@ target triple = "thumbv7k-apple-watchos2.0.0"
 ; Function Attrs: optsize ssp
 define i64 @_Z3foox(i64 returned) local_unnamed_addr #0 !dbg !13 {
   tail call void @llvm.dbg.value(metadata i64 %0, metadata !17, metadata !DIExpression()), !dbg !18
-  ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 0 32] $r5
-  ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 32 32] $r4
+  ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 0 32] $r0
+  ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 32 32] $r1
 
   %2 = load i64, ptr @g, align 8, !dbg !19, !tbaa !21
   %3 = icmp eq i64 %2, %0, !dbg !19

diff  --git a/llvm/test/DebugInfo/X86/live-debug-values.ll b/llvm/test/DebugInfo/X86/live-debug-values.ll
index db524d2b3bc98de..24441cc15ced50c 100644
--- a/llvm/test/DebugInfo/X86/live-debug-values.ll
+++ b/llvm/test/DebugInfo/X86/live-debug-values.ll
@@ -28,6 +28,9 @@
 ; This case will also produce multiple locations but only the debug range
 ; extension is tested here.
 
+; XFAIL: *
+; This test is failing after splitting the live range of variable "n" in %bb.3.
+
 ; DBG_VALUE for variable "n" is extended into %bb.5 from its predecessors %bb.3
 ; and %bb.4.
 ; CHECK:         .LBB0_5:

diff  --git a/llvm/test/tools/llvm-objdump/XCOFF/disassemble-symbolize-operands.ll b/llvm/test/tools/llvm-objdump/XCOFF/disassemble-symbolize-operands.ll
index 95399aa4d41d28c..adedb6b7a5abf81 100644
--- a/llvm/test/tools/llvm-objdump/XCOFF/disassemble-symbolize-operands.ll
+++ b/llvm/test/tools/llvm-objdump/XCOFF/disassemble-symbolize-operands.ll
@@ -17,13 +17,13 @@
 ; CHECK-NEXT:  <L2>:
 ; CHECK-NEXT:        60:      	bf	8, 0x84 <L1>
 ; CHECK-NEXT:  <L0>:
-; CHECK-NEXT:        64:      	mr	3, 31
-; CHECK-NEXT:        68:      	bl 0x0 <.internal>
-; CHECK-NEXT:        6c:      	mr	31, 3
-; CHECK-NEXT:        70:      	cmplwi	3, 11
-; CHECK-NEXT:        74:      	bt	0, 0x60 <L2>
-; CHECK-NEXT:        78:      	bl 0x0 <.internal>
-; CHECK-NEXT:        7c:      	nop
+; CHECK-NEXT:        64:      	bl 0x0 <.internal>
+; CHECK-NEXT:        68:      	cmplwi	3, 11
+; CHECK-NEXT:        6c:      	bt	0, 0x60 <L2>
+; CHECK-NEXT:        70:        mr      31, 3
+; CHECK-NEXT:        74:      	bl 0x0 <.internal>
+; CHECK-NEXT:        78:      	nop
+; CHECK-NEXT:        7c:        mr      3, 31
 ; CHECK-NEXT:        80:      	b 0x60 <L2>
 ; CHECK-NEXT:  <L1>:
 ; CHECK-NEXT:        84:      	lwz 31, 60(1)


        


More information about the llvm-commits mailing list