[clang] [llvm] [RegAlloc] [X86] Enable callee saved register optimization for x86 (PR #188609)

via cfe-commits cfe-commits at lists.llvm.org
Wed Jun 17 09:30:41 PDT 2026


https://github.com/weiguozhi updated https://github.com/llvm/llvm-project/pull/188609

>From 16c7e2709fa2217137e9c0bd331bfaa92ab6c8ac Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Fri, 6 Mar 2026 11:00:11 -0800
Subject: [PATCH 1/4] [RegAlloc] [X86] Enable callee saved register
 optimization for x86

Enable callee saved register optimization implemented in
RAGreedy::tryAssignCSRFirstTime() for x86. It can replace save/restore
instructions in prologue/epilogue with register spill/reload in cold
blocks or register splits.
---
 clang/test/Frontend/stack-layout-remark.c     |  20 +--
 llvm/lib/CodeGen/RegAllocGreedy.cpp           |   2 +-
 llvm/lib/Target/X86/X86RegisterInfo.h         |   5 +
 .../CodeGen/MLRegAlloc/interactive-mode.ll    |   2 +-
 .../X86/2007-08-09-IllegalX86-64Asm.ll        |  57 ++++----
 .../CodeGen/X86/2007-11-30-LoadFolding-Bug.ll |  36 ++---
 llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll  |   2 +-
 llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll  |  66 ++++-----
 llvm/test/CodeGen/X86/atom-fixup-lea2.ll      |   2 +-
 llvm/test/CodeGen/X86/block-placement.ll      |   4 +-
 .../CodeGen/X86/callbr-asm-branch-folding.ll  |   5 +-
 llvm/test/CodeGen/X86/fptosi-sat-scalar.ll    | 120 ++++++++--------
 llvm/test/CodeGen/X86/fptoui-sat-scalar.ll    | 136 +++++++++---------
 .../X86/indirect-branch-tracking-eh.ll        |   1 -
 llvm/test/CodeGen/X86/no-split-size.ll        |   2 +-
 llvm/test/CodeGen/X86/ragreedy-bug.ll         |   2 +-
 llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll |  60 ++++----
 llvm/test/CodeGen/X86/sjlj-eh.ll              |   2 +-
 .../CodeGen/X86/speculative-load-hardening.ll | 133 ++++++++---------
 llvm/test/CodeGen/X86/split-reg-with-hint.ll  |   2 +-
 .../CodeGen/X86/statepoint-vreg-details.ll    |   6 +-
 .../CodeGen/X86/statepoint-vreg-invoke.ll     |   4 +-
 .../X86/tail-dup-merge-loop-headers.ll        |   2 +-
 llvm/test/CodeGen/X86/tail-opts.ll            |  63 ++++----
 llvm/test/CodeGen/X86/x86-shrink-wrapping.ll  |  38 ++---
 25 files changed, 382 insertions(+), 390 deletions(-)

diff --git a/clang/test/Frontend/stack-layout-remark.c b/clang/test/Frontend/stack-layout-remark.c
index b0ed03c80f24a..461312bd047c1 100644
--- a/clang/test/Frontend/stack-layout-remark.c
+++ b/clang/test/Frontend/stack-layout-remark.c
@@ -135,26 +135,28 @@ extern void use_dot_vector(struct Array *data);
 //      O0-DEBUG: Function: do_work
 // O0-DEBUG-NEXT: Offset: [SP-4], Type: Variable, Align: 4, Size: 4
 // O0-DEBUG-NEXT: Offset: [SP-16], Type: Variable, Align: 8, Size: 8
-// O0-DEBUG-NEXT:     A @ {{.*}}stack-layout-remark.c:[[# @LINE + 20]]
+// O0-DEBUG-NEXT:     A @ {{.*}}stack-layout-remark.c:[[# @LINE + 22]]
 // O0-DEBUG-NEXT: Offset: [SP-24], Type: Variable, Align: 8, Size: 8
-// O0-DEBUG-NEXT:     B @ {{.*}}stack-layout-remark.c:[[# @LINE + 18]]
+// O0-DEBUG-NEXT:     B @ {{.*}}stack-layout-remark.c:[[# @LINE + 20]]
 // O0-DEBUG-NEXT: Offset: [SP-32], Type: Variable, Align: 8, Size: 8
-// O0-DEBUG-NEXT:     out @ {{.*}}stack-layout-remark.c:[[# @LINE + 16]]
+// O0-DEBUG-NEXT:     out @ {{.*}}stack-layout-remark.c:[[# @LINE + 18]]
 // O0-DEBUG-NEXT: Offset: [SP-36], Type: Variable, Align: 4, Size: 4
-// O0-DEBUG-NEXT:     len @ {{.*}}stack-layout-remark.c:[[# @LINE + 19]]
+// O0-DEBUG-NEXT:     len @ {{.*}}stack-layout-remark.c:[[# @LINE + 21]]
 // O0-DEBUG-NEXT: Offset: [SP-48], Type: Variable, Align: 8, Size: 8
-// O0-DEBUG-NEXT:     AB @ {{.*}}stack-layout-remark.c:[[# @LINE + 18]]
+// O0-DEBUG-NEXT:     AB @ {{.*}}stack-layout-remark.c:[[# @LINE + 20]]
 // O0-DEBUG-NEXT: Offset: [SP-52], Type: Variable, Align: 4, Size: 4
-// O0-DEBUG-NEXT:     sum @ {{.*}}stack-layout-remark.c:[[# @LINE + 32]]
+// O0-DEBUG-NEXT:     sum @ {{.*}}stack-layout-remark.c:[[# @LINE + 34]]
 // O0-DEBUG-NEXT: Offset: [SP-56], Type: Variable, Align: 4, Size: 4
-// O0-DEBUG-NEXT:     i @ {{.*}}stack-layout-remark.c:[[# @LINE + 31]]
+// O0-DEBUG-NEXT:     i @ {{.*}}stack-layout-remark.c:[[# @LINE + 33]]
 
 //      O3-DEBUG: Function: do_work
 // O3-DEBUG-NEXT: Offset: [SP-8], Type: Spill, Align: 16, Size: 8
 // O3-DEBUG-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
-// O3-DEBUG-NEXT: Offset: [SP-24], Type: Spill, Align: 16, Size: 8
+// O3-DEBUG-NEXT: Offset: [SP-24], Type: Spill, Align: 8, Size: 8
+// O3-DEBUG-NEXT:     B @ {{.*}}stack-layout-remark.c:[[# @LINE + 4]]
 // O3-DEBUG-NEXT: Offset: [SP-32], Type: Spill, Align: 8, Size: 8
-// O3-DEBUG-NEXT: Offset: [SP-40], Type: Spill, Align: 16, Size: 8
+// O3-DEBUG-NEXT:     A @ {{.*}}stack-layout-remark.c:[[# @LINE + 2]]
+// O3-DEBUG-NEXT: Offset: [SP-40], Type: Spill, Align: 8, Size: 8
 int do_work(struct Array *A, struct Array *B, struct Result *out) {
   if (!A || !B)
     return -1;
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 6b8a9b8190f9a..f2f1897795890 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -118,7 +118,7 @@ CSRFirstTimeCost("regalloc-csr-first-time-cost",
 static cl::opt<unsigned> CSRCostScale(
     "regalloc-csr-cost-scale",
     cl::desc("Scale for the callee-saved register cost, in percentage."),
-    cl::init(80), cl::Hidden);
+    cl::init(30), cl::Hidden);
 
 static cl::opt<unsigned long> GrowRegionComplexityBudget(
     "grow-region-complexity-budget",
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h
index e646591663aca..134089ed91f8d 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -182,6 +182,11 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
   bool requiresRegisterScavenging(const MachineFunction &MF) const override {
     return true;
   }
+
+  unsigned getCSRFirstUseCost() const override {
+    // push + pop.
+    return 2;
+  }
 };
 
 } // End llvm namespace
diff --git a/llvm/test/CodeGen/MLRegAlloc/interactive-mode.ll b/llvm/test/CodeGen/MLRegAlloc/interactive-mode.ll
index 51af5891c4e0f..881920e1f6575 100644
--- a/llvm/test/CodeGen/MLRegAlloc/interactive-mode.ll
+++ b/llvm/test/CodeGen/MLRegAlloc/interactive-mode.ll
@@ -7,7 +7,7 @@
 ; RUN: cp %S/Inputs/interactive_main.py %t.rundir
 ; RUN: %python %t.rundir/interactive_main.py %t.channel-basename \
 ; RUN:    llc -mtriple=x86_64-linux-unknown -regalloc=greedy -regalloc-enable-advisor=release -interactive-model-runner-echo-reply \
-; RUN:    -regalloc-evict-interactive-channel-base=%t.channel-basename %S/Inputs/two-large-fcts.ll -o /dev/null | FileCheck %s
+; RUN:    -regalloc-evict-interactive-channel-base=%t.channel-basename -regalloc-csr-cost-scale=0 %S/Inputs/two-large-fcts.ll -o /dev/null | FileCheck %s
 
 ;; Make sure we see both contexts. Also sanity-check that the advice is the
 ;; expected one - the index of the first legal register
diff --git a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
index 7bdc4e19a1cf6..e264e74fee9d3 100644
--- a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
+++ b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
@@ -24,20 +24,14 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
 ; CHECK:       ## %bb.0: ## %entry
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    pushq %r15
-; CHECK-NEXT:    .cfi_def_cfa_offset 24
 ; CHECK-NEXT:    pushq %r14
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    pushq %r12
-; CHECK-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
 ; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    subq $32, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-NEXT:    .cfi_offset %rbx, -48
-; CHECK-NEXT:    .cfi_offset %r12, -40
-; CHECK-NEXT:    .cfi_offset %r14, -32
-; CHECK-NEXT:    .cfi_offset %r15, -24
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset %rbx, -32
+; CHECK-NEXT:    .cfi_offset %r14, -24
 ; CHECK-NEXT:    .cfi_offset %rbp, -16
 ; CHECK-NEXT:    movq %rsi, %rbx
 ; CHECK-NEXT:    movq %rdi, %r14
@@ -80,7 +74,7 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
 ; CHECK-NEXT:    movzbl %sil, %ecx
 ; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    divb %dl
-; CHECK-NEXT:    movl %eax, %r15d
+; CHECK-NEXT:    ## kill: def $al killed $al def $eax
 ; CHECK-NEXT:    testb %cl, %cl
 ; CHECK-NEXT:    jne LBB0_12
 ; CHECK-NEXT:    jmp LBB0_14
@@ -103,32 +97,34 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
 ; CHECK-NEXT:    callq _feraiseexcept
 ; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
 ; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
-; CHECK-NEXT:    xorl %r15d, %r15d
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %sil, %sil
 ; CHECK-NEXT:    je LBB0_14
 ; CHECK-NEXT:  LBB0_12: ## %cond_false.i
 ; CHECK-NEXT:    testb %dl, %dl
 ; CHECK-NEXT:    je LBB0_14
 ; CHECK-NEXT:  ## %bb.13: ## %cond_next17.i
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
 ; CHECK-NEXT:    movzbl %sil, %eax
 ; CHECK-NEXT:    divb %dl
-; CHECK-NEXT:    movzbl %ah, %ebx
+; CHECK-NEXT:    movzbl %ah, %ebp
 ; CHECK-NEXT:    jmp LBB0_18
 ; CHECK-NEXT:  LBB0_14: ## %cond_true.i200
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
 ; CHECK-NEXT:    testb %dl, %dl
 ; CHECK-NEXT:    jne LBB0_17
 ; CHECK-NEXT:  ## %bb.16: ## %cond_true14.i
 ; CHECK-NEXT:    movl $4, %edi
 ; CHECK-NEXT:    callq _feraiseexcept
 ; CHECK-NEXT:  LBB0_17: ## %ubyte_ctype_remainder.exit
-; CHECK-NEXT:    xorl %ebx, %ebx
+; CHECK-NEXT:    xorl %ebp, %ebp
 ; CHECK-NEXT:  LBB0_18: ## %ubyte_ctype_remainder.exit
 ; CHECK-NEXT:    movq (%r14), %rax
 ; CHECK-NEXT:    callq *224(%rax)
 ; CHECK-NEXT:    testl %eax, %eax
 ; CHECK-NEXT:    je LBB0_21
 ; CHECK-NEXT:  ## %bb.19: ## %cond_true61
-; CHECK-NEXT:    movl %eax, %ebp
+; CHECK-NEXT:    movl %eax, %ebx
 ; CHECK-NEXT:    movq (%r14), %rax
 ; CHECK-NEXT:    movq _.str5 at GOTPCREL(%rip), %rdi
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
@@ -143,7 +139,7 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
 ; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %edi
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; CHECK-NEXT:    movl %ebp, %edx
+; CHECK-NEXT:    movl %ebx, %edx
 ; CHECK-NEXT:    callq *232(%rax)
 ; CHECK-NEXT:    testl %eax, %eax
 ; CHECK-NEXT:    jne LBB0_27
@@ -153,43 +149,42 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
 ; CHECK-NEXT:    testq %rax, %rax
 ; CHECK-NEXT:    je LBB0_27
 ; CHECK-NEXT:  ## %bb.22: ## %cond_next97
-; CHECK-NEXT:    movq %rax, %r14
-; CHECK-NEXT:    movq _PyArray_API at GOTPCREL(%rip), %r12
-; CHECK-NEXT:    movq (%r12), %rax
+; CHECK-NEXT:    movq %rax, %rbx
+; CHECK-NEXT:    movq _PyArray_API at GOTPCREL(%rip), %r14
+; CHECK-NEXT:    movq (%r14), %rax
 ; CHECK-NEXT:    movq 200(%rax), %rdi
 ; CHECK-NEXT:    xorl %esi, %esi
 ; CHECK-NEXT:    callq *304(%rdi)
 ; CHECK-NEXT:    testq %rax, %rax
 ; CHECK-NEXT:    je LBB0_25
 ; CHECK-NEXT:  ## %bb.23: ## %cond_next135
-; CHECK-NEXT:    movb %r15b, 16(%rax)
-; CHECK-NEXT:    movq %rax, 24(%r14)
-; CHECK-NEXT:    movq (%r12), %rax
+; CHECK-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx ## 4-byte Reload
+; CHECK-NEXT:    movb %cl, 16(%rax)
+; CHECK-NEXT:    movq %rax, 24(%rbx)
+; CHECK-NEXT:    movq (%r14), %rax
 ; CHECK-NEXT:    movq 200(%rax), %rdi
 ; CHECK-NEXT:    xorl %esi, %esi
 ; CHECK-NEXT:    callq *304(%rdi)
 ; CHECK-NEXT:    testq %rax, %rax
 ; CHECK-NEXT:    je LBB0_25
 ; CHECK-NEXT:  ## %bb.24: ## %cond_next182
-; CHECK-NEXT:    movb %bl, 16(%rax)
-; CHECK-NEXT:    movq %rax, 32(%r14)
-; CHECK-NEXT:    movq %r14, %rax
+; CHECK-NEXT:    movb %bpl, 16(%rax)
+; CHECK-NEXT:    movq %rax, 32(%rbx)
+; CHECK-NEXT:    movq %rbx, %rax
 ; CHECK-NEXT:    jmp LBB0_28
 ; CHECK-NEXT:  LBB0_25: ## %cond_true113
-; CHECK-NEXT:    decq (%r14)
+; CHECK-NEXT:    decq (%rbx)
 ; CHECK-NEXT:    jne LBB0_27
 ; CHECK-NEXT:  ## %bb.26: ## %cond_true126
-; CHECK-NEXT:    movq 8(%r14), %rax
-; CHECK-NEXT:    movq %r14, %rdi
+; CHECK-NEXT:    movq 8(%rbx), %rax
+; CHECK-NEXT:    movq %rbx, %rdi
 ; CHECK-NEXT:    callq *48(%rax)
 ; CHECK-NEXT:  LBB0_27: ## %UnifiedReturnBlock
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:  LBB0_28: ## %UnifiedReturnBlock
 ; CHECK-NEXT:    addq $32, %rsp
 ; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:    popq %r12
 ; CHECK-NEXT:    popq %r14
-; CHECK-NEXT:    popq %r15
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    retq
 entry:
diff --git a/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
index 6541693776099..65c291ab5a748 100644
--- a/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
+++ b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
@@ -14,13 +14,12 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
 ; CHECK-NEXT:    pushl %esi
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    movb $1, %al
-; CHECK-NEXT:    movl $1, %ebx
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT:    movl $1, %esi
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_1: # %bb.i5
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    addl %ebx, %ebx
+; CHECK-NEXT:    addl %esi, %esi
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb $1, %cl
 ; CHECK-NEXT:    jne .LBB0_1
@@ -34,17 +33,17 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
 ; CHECK-NEXT:    andl $1, %ebp
 ; CHECK-NEXT:    xorpd %xmm0, %xmm0
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    xorl %edi, %edi
 ; CHECK-NEXT:    xorpd %xmm1, %xmm1
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_7: # %bb.i28.i
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    cvttsd2si %xmm1, %edi
-; CHECK-NEXT:    cmpl %edx, %edi
-; CHECK-NEXT:    cmovgel %eax, %edi
-; CHECK-NEXT:    addl $2, %ecx
+; CHECK-NEXT:    cvttsd2si %xmm1, %ecx
+; CHECK-NEXT:    cmpl %edx, %ecx
+; CHECK-NEXT:    cmovgel %eax, %ecx
+; CHECK-NEXT:    addl $2, %edi
 ; CHECK-NEXT:    xorps %xmm2, %xmm2
-; CHECK-NEXT:    cvtsi2sd %edi, %xmm2
+; CHECK-NEXT:    cvtsi2sd %ecx, %xmm2
 ; CHECK-NEXT:    xorpd %xmm1, %xmm1
 ; CHECK-NEXT:    subsd %xmm2, %xmm1
 ; CHECK-NEXT:    mulsd %xmm0, %xmm1
@@ -54,9 +53,11 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
 ; CHECK-NEXT:    movl $0, 0
 ; CHECK-NEXT:    je .LBB0_9
 ; CHECK-NEXT:  # %bb.10: # %mp_sqrt_init.exit
+; CHECK-NEXT:    movl %esi, (%esp) # 4-byte Spill
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    movl %edx, %edi
-; CHECK-NEXT:    movl %esi, %edx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; CHECK-NEXT:    movl %ebx, %edx
 ; CHECK-NEXT:    calll mp_mul_csqu at PLT
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    movl $-1, %edx
@@ -66,11 +67,11 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
 ; CHECK-NEXT:    calll rdft at PLT
 ; CHECK-NEXT:    addl $12, %esp
 ; CHECK-NEXT:    xorl %ecx, %ecx
-; CHECK-NEXT:    movl %edi, (%esp) # 4-byte Spill
+; CHECK-NEXT:    movl %edi, %esi
 ; CHECK-NEXT:    movl %edi, %edx
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; CHECK-NEXT:    pushl %edi
-; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    pushl %ebx
 ; CHECK-NEXT:    pushl $0
 ; CHECK-NEXT:    calll mp_mul_d2i at PLT
 ; CHECK-NEXT:    addl $12, %esp
@@ -90,7 +91,6 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
 ; CHECK-NEXT:    jmp .LBB0_9
 ; CHECK-NEXT:  .LBB0_11: # %cond_false.i
 ; CHECK-NEXT:    xorl %ecx, %ecx
-; CHECK-NEXT:    movl (%esp), %esi # 4-byte Reload
 ; CHECK-NEXT:    movl %esi, %edx
 ; CHECK-NEXT:    pushl {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    pushl $0
@@ -98,10 +98,10 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
 ; CHECK-NEXT:    addl $8, %esp
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    movl %esi, %edx
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; CHECK-NEXT:    pushl %ebx
 ; CHECK-NEXT:    pushl %edi
-; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    pushl %ebx
 ; CHECK-NEXT:    calll mp_add at PLT
 ; CHECK-NEXT:    addl $12, %esp
 ; CHECK-NEXT:    xorl %ecx, %ecx
@@ -113,7 +113,7 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
 ; CHECK-NEXT:    addl $12, %esp
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    movl %esi, %edx
-; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    pushl %ebx
 ; CHECK-NEXT:    pushl $0
 ; CHECK-NEXT:    calll mp_round at PLT
 ; CHECK-NEXT:    addl $8, %esp
@@ -121,7 +121,7 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
 ; CHECK-NEXT:    movl %esi, %edx
 ; CHECK-NEXT:    pushl %edi
 ; CHECK-NEXT:    pushl {{[0-9]+}}(%esp)
-; CHECK-NEXT:    pushl %ebx
+; CHECK-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
 ; CHECK-NEXT:    calll mp_mul_d2i at PLT
 ; CHECK-NEXT:    addl $16, %esp
 ; CHECK-NEXT:    jmp .LBB0_4
diff --git a/llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll b/llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll
index b32afdc2214e0..78b369599cd2f 100644
--- a/llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll
+++ b/llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i386-apple-darwin -disable-cgp-branch-opts | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-cgp-branch-opts -regalloc-csr-cost-scale=0 | FileCheck %s
 
 	%struct.DBC_t = type { i32, ptr, i16, ptr, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32, i32, i32, ptr, ptr, ptr, ptr, ptr, i32, i32, i32, i32, i32, i32, i32, i32, i16, i16, ptr, i8, i16, ptr, i16 }
 	%struct.DRVOPT = type { i16, i32, i8, ptr }
diff --git a/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll b/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll
index 254f824379d56..dde3e36fa1a2d 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll
@@ -32,18 +32,18 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
 ; CHECK-NEXT:    movw %bx, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movb %bpl, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    ldtilecfg (%rsp)
-; CHECK-NEXT:    movl $32, %r14d
-; CHECK-NEXT:    movl $buf+2048, %r15d
-; CHECK-NEXT:    tileloadd (%r15,%r14), %tmm5
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    movl $32, %eax
+; CHECK-NEXT:    movl $buf+2048, %r14d
+; CHECK-NEXT:    tileloadd (%r14,%rax), %tmm5
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    testb %cl, %cl
 ; CHECK-NEXT:    jne .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %if.true
-; CHECK-NEXT:    movl $buf, %eax
-; CHECK-NEXT:    movw $8, %cx
-; CHECK-NEXT:    tileloadd (%rax,%r14), %tmm0
-; CHECK-NEXT:    movl $buf+1024, %eax
-; CHECK-NEXT:    tileloadd (%rax,%r14), %tmm1
+; CHECK-NEXT:    movl $buf, %ecx
+; CHECK-NEXT:    movw $8, %dx
+; CHECK-NEXT:    tileloadd (%rcx,%rax), %tmm0
+; CHECK-NEXT:    movl $buf+1024, %ecx
+; CHECK-NEXT:    tileloadd (%rcx,%rax), %tmm1
 ; CHECK-NEXT:    movabsq $64, %rax
 ; CHECK-NEXT:    tilestored %tmm5, 1088(%rsp,%rax) # 1024-byte Folded Spill
 ; CHECK-NEXT:    tdpbssd %tmm1, %tmm0, %tmm5
@@ -56,22 +56,23 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
 ; CHECK-NEXT:    tileloadd 64(%rsp,%rax), %tmm6 # 1024-byte Folded Reload
 ; CHECK-NEXT:    jmp .LBB0_3
 ; CHECK-NEXT:  .LBB0_2: # %if.false
-; CHECK-NEXT:    movl $buf, %eax
-; CHECK-NEXT:    movw $8, %cx
-; CHECK-NEXT:    tileloadd (%rax,%r14), %tmm2
-; CHECK-NEXT:    movl $buf+1024, %eax
-; CHECK-NEXT:    tileloadd (%rax,%r14), %tmm3
+; CHECK-NEXT:    movl $buf, %ecx
+; CHECK-NEXT:    movw $8, %dx
+; CHECK-NEXT:    tileloadd (%rcx,%rax), %tmm2
+; CHECK-NEXT:    movl $buf+1024, %ecx
+; CHECK-NEXT:    tileloadd (%rcx,%rax), %tmm3
 ; CHECK-NEXT:    movabsq $64, %rax
 ; CHECK-NEXT:    tilestored %tmm5, 1088(%rsp,%rax) # 1024-byte Folded Spill
 ; CHECK-NEXT:    tdpbssd %tmm3, %tmm2, %tmm5
 ; CHECK-NEXT:    tilestored %tmm5, 64(%rsp,%rax) # 1024-byte Folded Spill
+; CHECK-NEXT:    movl $32, %r15d
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    callq foo
 ; CHECK-NEXT:    ldtilecfg (%rsp)
 ; CHECK-NEXT:    movabsq $64, %rax
 ; CHECK-NEXT:    tileloadd 64(%rsp,%rax), %tmm6 # 1024-byte Folded Reload
-; CHECK-NEXT:    tilestored %tmm6, (%r15,%r14)
+; CHECK-NEXT:    tilestored %tmm6, (%r14,%r15)
 ; CHECK-NEXT:  .LBB0_3: # %exit
 ; CHECK-NEXT:    movl $buf, %eax
 ; CHECK-NEXT:    movl $32, %ecx
@@ -118,22 +119,22 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
 ; EGPR-NEXT:    movw %bx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x5c,0x24,0x12]
 ; EGPR-NEXT:    movb %bpl, {{[0-9]+}}(%rsp) # encoding: [0x40,0x88,0x6c,0x24,0x30]
 ; EGPR-NEXT:    ldtilecfg (%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x04,0x24]
-; EGPR-NEXT:    movl $32, %r14d # encoding: [0x41,0xbe,0x20,0x00,0x00,0x00]
-; EGPR-NEXT:    movl $buf+2048, %r15d # encoding: [0x41,0xbf,A,A,A,A]
+; EGPR-NEXT:    movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
+; EGPR-NEXT:    movl $buf+2048, %r14d # encoding: [0x41,0xbe,A,A,A,A]
 ; EGPR-NEXT:    # fixup A - offset: 2, value: buf+2048, kind: FK_Data_4
-; EGPR-NEXT:    tileloadd (%r15,%r14), %tmm5 # EVEX TO VEX Compression encoding: [0xc4,0x82,0x7b,0x4b,0x2c,0x37]
-; EGPR-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
-; EGPR-NEXT:    testb %al, %al # encoding: [0x84,0xc0]
+; EGPR-NEXT:    tileloadd (%r14,%rax), %tmm5 # EVEX TO VEX Compression encoding: [0xc4,0xc2,0x7b,0x4b,0x2c,0x06]
+; EGPR-NEXT:    xorl %ecx, %ecx # encoding: [0x31,0xc9]
+; EGPR-NEXT:    testb %cl, %cl # encoding: [0x84,0xc9]
 ; EGPR-NEXT:    jne .LBB0_2 # encoding: [0x75,A]
 ; EGPR-NEXT:    # fixup A - offset: 1, value: .LBB0_2, kind: FK_PCRel_1
 ; EGPR-NEXT:  # %bb.1: # %if.true
-; EGPR-NEXT:    movl $buf, %eax # encoding: [0xb8,A,A,A,A]
+; EGPR-NEXT:    movl $buf, %ecx # encoding: [0xb9,A,A,A,A]
 ; EGPR-NEXT:    # fixup A - offset: 1, value: buf, kind: FK_Data_4
-; EGPR-NEXT:    movw $8, %cx # encoding: [0x66,0xb9,0x08,0x00]
-; EGPR-NEXT:    tileloadd (%rax,%r14), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x04,0x30]
-; EGPR-NEXT:    movl $buf+1024, %eax # encoding: [0xb8,A,A,A,A]
+; EGPR-NEXT:    movw $8, %dx # encoding: [0x66,0xba,0x08,0x00]
+; EGPR-NEXT:    tileloadd (%rcx,%rax), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x04,0x01]
+; EGPR-NEXT:    movl $buf+1024, %ecx # encoding: [0xb9,A,A,A,A]
 ; EGPR-NEXT:    # fixup A - offset: 1, value: buf+1024, kind: FK_Data_4
-; EGPR-NEXT:    tileloadd (%rax,%r14), %tmm1 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x0c,0x30]
+; EGPR-NEXT:    tileloadd (%rcx,%rax), %tmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x0c,0x01]
 ; EGPR-NEXT:    movabsq $64, %rax # encoding: [0x48,0xb8,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 ; EGPR-NEXT:    tilestored %tmm5, 1088(%rsp,%rax) # 1024-byte Folded Spill
 ; EGPR-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0xac,0x04,0x40,0x04,0x00,0x00]
@@ -151,19 +152,20 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
 ; EGPR-NEXT:    jmp .LBB0_3 # encoding: [0xeb,A]
 ; EGPR-NEXT:    # fixup A - offset: 1, value: .LBB0_3, kind: FK_PCRel_1
 ; EGPR-NEXT:  .LBB0_2: # %if.false
-; EGPR-NEXT:    movl $buf, %eax # encoding: [0xb8,A,A,A,A]
+; EGPR-NEXT:    movl $buf, %ecx # encoding: [0xb9,A,A,A,A]
 ; EGPR-NEXT:    # fixup A - offset: 1, value: buf, kind: FK_Data_4
-; EGPR-NEXT:    movw $8, %cx # encoding: [0x66,0xb9,0x08,0x00]
-; EGPR-NEXT:    tileloadd (%rax,%r14), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x14,0x30]
-; EGPR-NEXT:    movl $buf+1024, %eax # encoding: [0xb8,A,A,A,A]
+; EGPR-NEXT:    movw $8, %dx # encoding: [0x66,0xba,0x08,0x00]
+; EGPR-NEXT:    tileloadd (%rcx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x14,0x01]
+; EGPR-NEXT:    movl $buf+1024, %ecx # encoding: [0xb9,A,A,A,A]
 ; EGPR-NEXT:    # fixup A - offset: 1, value: buf+1024, kind: FK_Data_4
-; EGPR-NEXT:    tileloadd (%rax,%r14), %tmm3 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x1c,0x30]
+; EGPR-NEXT:    tileloadd (%rcx,%rax), %tmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x1c,0x01]
 ; EGPR-NEXT:    movabsq $64, %rax # encoding: [0x48,0xb8,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 ; EGPR-NEXT:    tilestored %tmm5, 1088(%rsp,%rax) # 1024-byte Folded Spill
 ; EGPR-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0xac,0x04,0x40,0x04,0x00,0x00]
 ; EGPR-NEXT:    tdpbssd %tmm3, %tmm2, %tmm5 # encoding: [0xc4,0xe2,0x63,0x5e,0xea]
 ; EGPR-NEXT:    tilestored %tmm5, 64(%rsp,%rax) # 1024-byte Folded Spill
 ; EGPR-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x6c,0x04,0x40]
+; EGPR-NEXT:    movl $32, %r15d # encoding: [0x41,0xbf,0x20,0x00,0x00,0x00]
 ; EGPR-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
 ; EGPR-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; EGPR-NEXT:    callq foo # encoding: [0xe8,A,A,A,A]
@@ -172,7 +174,7 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
 ; EGPR-NEXT:    movabsq $64, %rax # encoding: [0x48,0xb8,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 ; EGPR-NEXT:    tileloadd 64(%rsp,%rax), %tmm6 # 1024-byte Folded Reload
 ; EGPR-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x74,0x04,0x40]
-; EGPR-NEXT:    tilestored %tmm6, (%r15,%r14) # EVEX TO VEX Compression encoding: [0xc4,0x82,0x7a,0x4b,0x34,0x37]
+; EGPR-NEXT:    tilestored %tmm6, (%r14,%r15) # EVEX TO VEX Compression encoding: [0xc4,0x82,0x7a,0x4b,0x34,0x3e]
 ; EGPR-NEXT:  .LBB0_3: # %exit
 ; EGPR-NEXT:    movl $buf, %eax # encoding: [0xb8,A,A,A,A]
 ; EGPR-NEXT:    # fixup A - offset: 1, value: buf, kind: FK_Data_4
diff --git a/llvm/test/CodeGen/X86/atom-fixup-lea2.ll b/llvm/test/CodeGen/X86/atom-fixup-lea2.ll
index adea9e1386a90..20519a9974542 100644
--- a/llvm/test/CodeGen/X86/atom-fixup-lea2.ll
+++ b/llvm/test/CodeGen/X86/atom-fixup-lea2.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
 
 ; CHECK:%bb.5
-; CHECK-NEXT:leal
+; CHECK-NEXT:addl
 ; CHECK-NEXT:leal
 ; CHECK-NEXT:leal
 ; CHECK-NEXT:movl
diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll
index 1369131413053..df682a5eb6f56 100644
--- a/llvm/test/CodeGen/X86/block-placement.ll
+++ b/llvm/test/CodeGen/X86/block-placement.ll
@@ -1057,6 +1057,7 @@ while.end:
 }
 
 declare void @cold_function() cold
+declare i32 @foo_function(i32)
 
 define i32 @test_cold_calls(ptr %a) {
 ; Test that edges to blocks post-dominated by cold calls are
@@ -1085,7 +1086,8 @@ else:
 
 exit:
   %ret = phi i32 [ %val1, %then ], [ %val2, %else ]
-  ret i32 %ret
+  %ret2 = call i32 @foo_function(i32 %ret)
+  ret i32 %ret2
 }
 
 ; Make sure we put landingpads out of the way.
diff --git a/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll b/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll
index 3d389523dffb3..f90e05203fce5 100644
--- a/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll
+++ b/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll
@@ -17,7 +17,7 @@ define dso_local void @n(ptr %o, i32 %p, i32 %u) nounwind {
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    movl %edx, %ebp
-; CHECK-NEXT:    movl %esi, %r12d
+; CHECK-NEXT:    movl %esi, %r15d
 ; CHECK-NEXT:    movq %rdi, %rbx
 ; CHECK-NEXT:    callq c
 ; CHECK-NEXT:    movl %eax, %r14d
@@ -26,12 +26,13 @@ define dso_local void @n(ptr %o, i32 %p, i32 %u) nounwind {
 ; CHECK-NEXT:    testl %eax, %eax
 ; CHECK-NEXT:    jne .LBB0_9
 ; CHECK-NEXT:  # %bb.1: # %if.end
+; CHECK-NEXT:    movl %r15d, %eax
 ; CHECK-NEXT:    cmpl $0, e(%rip)
 ; CHECK-NEXT:    # implicit-def: $r15d
 ; CHECK-NEXT:    # implicit-def: $r13d
 ; CHECK-NEXT:    je .LBB0_4
 ; CHECK-NEXT:  # %bb.2: # %if.then4
-; CHECK-NEXT:    movslq %r12d, %rdi
+; CHECK-NEXT:    movslq %eax, %rdi
 ; CHECK-NEXT:    callq m
 ; CHECK-NEXT:    # implicit-def: $r15d
 ; CHECK-NEXT:    # implicit-def: $r12d
diff --git a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
index 9b7a43a29a942..0088a92f271c5 100644
--- a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
@@ -706,10 +706,11 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
 ; X86-X87-NEXT:    xorl %edx, %edx
 ; X86-X87-NEXT:    movb %bh, %ah
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movl $-8, %ebx
+; X86-X87-NEXT:    movl $-8, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X86-X87-NEXT:    jb .LBB8_2
 ; X86-X87-NEXT:  # %bb.1:
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:  .LBB8_2:
 ; X86-X87-NEXT:    movl $0, %ecx
 ; X86-X87-NEXT:    movl $0, %ebp
@@ -730,13 +731,13 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
 ; X86-X87-NEXT:    fnstsw %ax
 ; X86-X87-NEXT:    # kill: def $ah killed $ah killed $ax
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movl $-1, %eax
 ; X86-X87-NEXT:    movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-X87-NEXT:    movl $-1, %eax
 ; X86-X87-NEXT:    movl $-1, %esi
 ; X86-X87-NEXT:    ja .LBB8_8
 ; X86-X87-NEXT:  # %bb.7:
-; X86-X87-NEXT:    movl %edi, %eax
-; X86-X87-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:    movl %ebp, %eax
 ; X86-X87-NEXT:    movl %ecx, %esi
 ; X86-X87-NEXT:  .LBB8_8:
 ; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -744,7 +745,7 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
 ; X86-X87-NEXT:    movl $7, %edi
 ; X86-X87-NEXT:    ja .LBB8_10
 ; X86-X87-NEXT:  # %bb.9:
-; X86-X87-NEXT:    movl %ebx, %edi
+; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X86-X87-NEXT:  .LBB8_10:
 ; X86-X87-NEXT:    fucomp %st(0)
 ; X86-X87-NEXT:    fnstsw %ax
@@ -875,8 +876,8 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
 ; X86-X87-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X86-X87-NEXT:    movb %bh, %ah
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movl $0, %eax
-; X86-X87-NEXT:    movl $0, %ebx
+; X86-X87-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-X87-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X86-X87-NEXT:    jae .LBB9_1
 ; X86-X87-NEXT:  # %bb.2:
 ; X86-X87-NEXT:    movl $0, %edx
@@ -887,7 +888,6 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
 ; X86-X87-NEXT:  .LBB9_5:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-X87-NEXT:  .LBB9_6:
-; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X86-X87-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
 ; X86-X87-NEXT:    fucom %st(1)
@@ -903,13 +903,13 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
 ; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-X87-NEXT:    movl $-1, %ebp
-; X86-X87-NEXT:    movl $-1, %edi
 ; X86-X87-NEXT:    movl $-1, %esi
+; X86-X87-NEXT:    movl $-1, %edi
 ; X86-X87-NEXT:    ja .LBB9_10
 ; X86-X87-NEXT:  # %bb.9:
 ; X86-X87-NEXT:    movl %edx, %ebp
-; X86-X87-NEXT:    movl %ebx, %edi
 ; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X86-X87-NEXT:  .LBB9_10:
 ; X86-X87-NEXT:    fucomp %st(0)
 ; X86-X87-NEXT:    fnstsw %ax
@@ -920,8 +920,8 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
 ; X86-X87-NEXT:    movl $0, %ebx
 ; X86-X87-NEXT:    jp .LBB9_12
 ; X86-X87-NEXT:  # %bb.11:
-; X86-X87-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-X87-NEXT:    movl %edi, %eax
+; X86-X87-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:    movl %esi, %eax
 ; X86-X87-NEXT:    movl %ebp, %edx
 ; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X86-X87-NEXT:  .LBB9_12:
@@ -939,7 +939,9 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
 ; X86-X87-NEXT:    retl $4
 ; X86-X87-NEXT:  .LBB9_1:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    movl $0, %edx
 ; X86-X87-NEXT:    jb .LBB9_4
 ; X86-X87-NEXT:  .LBB9_3:
@@ -1715,10 +1717,11 @@ define i100 @test_signed_i100_f64(double %f) nounwind {
 ; X86-X87-NEXT:    xorl %edx, %edx
 ; X86-X87-NEXT:    movb %bh, %ah
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movl $-8, %ebx
+; X86-X87-NEXT:    movl $-8, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X86-X87-NEXT:    jb .LBB18_2
 ; X86-X87-NEXT:  # %bb.1:
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:  .LBB18_2:
 ; X86-X87-NEXT:    movl $0, %ecx
 ; X86-X87-NEXT:    movl $0, %ebp
@@ -1739,13 +1742,13 @@ define i100 @test_signed_i100_f64(double %f) nounwind {
 ; X86-X87-NEXT:    fnstsw %ax
 ; X86-X87-NEXT:    # kill: def $ah killed $ah killed $ax
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movl $-1, %eax
 ; X86-X87-NEXT:    movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-X87-NEXT:    movl $-1, %eax
 ; X86-X87-NEXT:    movl $-1, %esi
 ; X86-X87-NEXT:    ja .LBB18_8
 ; X86-X87-NEXT:  # %bb.7:
-; X86-X87-NEXT:    movl %edi, %eax
-; X86-X87-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:    movl %ebp, %eax
 ; X86-X87-NEXT:    movl %ecx, %esi
 ; X86-X87-NEXT:  .LBB18_8:
 ; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -1753,7 +1756,7 @@ define i100 @test_signed_i100_f64(double %f) nounwind {
 ; X86-X87-NEXT:    movl $7, %edi
 ; X86-X87-NEXT:    ja .LBB18_10
 ; X86-X87-NEXT:  # %bb.9:
-; X86-X87-NEXT:    movl %ebx, %edi
+; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X86-X87-NEXT:  .LBB18_10:
 ; X86-X87-NEXT:    fucomp %st(0)
 ; X86-X87-NEXT:    fnstsw %ax
@@ -1884,8 +1887,8 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
 ; X86-X87-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X86-X87-NEXT:    movb %bh, %ah
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movl $0, %eax
-; X86-X87-NEXT:    movl $0, %ebx
+; X86-X87-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-X87-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X86-X87-NEXT:    jae .LBB19_1
 ; X86-X87-NEXT:  # %bb.2:
 ; X86-X87-NEXT:    movl $0, %edx
@@ -1896,7 +1899,6 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
 ; X86-X87-NEXT:  .LBB19_5:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-X87-NEXT:  .LBB19_6:
-; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    fldl {{\.?LCPI[0-9]+_[0-9]+}}
 ; X86-X87-NEXT:    fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
 ; X86-X87-NEXT:    fucom %st(1)
@@ -1912,13 +1914,13 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
 ; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-X87-NEXT:    movl $-1, %ebp
-; X86-X87-NEXT:    movl $-1, %edi
 ; X86-X87-NEXT:    movl $-1, %esi
+; X86-X87-NEXT:    movl $-1, %edi
 ; X86-X87-NEXT:    ja .LBB19_10
 ; X86-X87-NEXT:  # %bb.9:
 ; X86-X87-NEXT:    movl %edx, %ebp
-; X86-X87-NEXT:    movl %ebx, %edi
 ; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X86-X87-NEXT:  .LBB19_10:
 ; X86-X87-NEXT:    fucomp %st(0)
 ; X86-X87-NEXT:    fnstsw %ax
@@ -1929,8 +1931,8 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
 ; X86-X87-NEXT:    movl $0, %ebx
 ; X86-X87-NEXT:    jp .LBB19_12
 ; X86-X87-NEXT:  # %bb.11:
-; X86-X87-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-X87-NEXT:    movl %edi, %eax
+; X86-X87-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:    movl %esi, %eax
 ; X86-X87-NEXT:    movl %ebp, %edx
 ; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X86-X87-NEXT:  .LBB19_12:
@@ -1948,7 +1950,9 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
 ; X86-X87-NEXT:    retl $4
 ; X86-X87-NEXT:  .LBB19_1:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    movl $0, %edx
 ; X86-X87-NEXT:    jb .LBB19_4
 ; X86-X87-NEXT:  .LBB19_3:
@@ -2857,10 +2861,11 @@ define i100 @test_signed_i100_f16(half %f) nounwind {
 ; X86-X87-NEXT:    xorl %edx, %edx
 ; X86-X87-NEXT:    movb %bh, %ah
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movl $-8, %ebx
+; X86-X87-NEXT:    movl $-8, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X86-X87-NEXT:    jb .LBB28_2
 ; X86-X87-NEXT:  # %bb.1:
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:  .LBB28_2:
 ; X86-X87-NEXT:    movl $0, %ecx
 ; X86-X87-NEXT:    movl $0, %ebp
@@ -2881,13 +2886,13 @@ define i100 @test_signed_i100_f16(half %f) nounwind {
 ; X86-X87-NEXT:    fnstsw %ax
 ; X86-X87-NEXT:    # kill: def $ah killed $ah killed $ax
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movl $-1, %eax
 ; X86-X87-NEXT:    movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-X87-NEXT:    movl $-1, %eax
 ; X86-X87-NEXT:    movl $-1, %esi
 ; X86-X87-NEXT:    ja .LBB28_8
 ; X86-X87-NEXT:  # %bb.7:
-; X86-X87-NEXT:    movl %edi, %eax
-; X86-X87-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:    movl %ebp, %eax
 ; X86-X87-NEXT:    movl %ecx, %esi
 ; X86-X87-NEXT:  .LBB28_8:
 ; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -2895,7 +2900,7 @@ define i100 @test_signed_i100_f16(half %f) nounwind {
 ; X86-X87-NEXT:    movl $7, %edi
 ; X86-X87-NEXT:    ja .LBB28_10
 ; X86-X87-NEXT:  # %bb.9:
-; X86-X87-NEXT:    movl %ebx, %edi
+; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X86-X87-NEXT:  .LBB28_10:
 ; X86-X87-NEXT:    fucomp %st(0)
 ; X86-X87-NEXT:    fnstsw %ax
@@ -3036,8 +3041,8 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
 ; X86-X87-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X86-X87-NEXT:    movb %bh, %ah
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movl $0, %eax
-; X86-X87-NEXT:    movl $0, %ebx
+; X86-X87-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-X87-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X86-X87-NEXT:    jae .LBB29_1
 ; X86-X87-NEXT:  # %bb.2:
 ; X86-X87-NEXT:    movl $0, %edx
@@ -3048,7 +3053,6 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
 ; X86-X87-NEXT:  .LBB29_5:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-X87-NEXT:  .LBB29_6:
-; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X86-X87-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
 ; X86-X87-NEXT:    fucom %st(1)
@@ -3064,13 +3068,13 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
 ; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-X87-NEXT:    movl $-1, %ebp
-; X86-X87-NEXT:    movl $-1, %edi
 ; X86-X87-NEXT:    movl $-1, %esi
+; X86-X87-NEXT:    movl $-1, %edi
 ; X86-X87-NEXT:    ja .LBB29_10
 ; X86-X87-NEXT:  # %bb.9:
 ; X86-X87-NEXT:    movl %edx, %ebp
-; X86-X87-NEXT:    movl %ebx, %edi
 ; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X86-X87-NEXT:  .LBB29_10:
 ; X86-X87-NEXT:    fucomp %st(0)
 ; X86-X87-NEXT:    fnstsw %ax
@@ -3081,8 +3085,8 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
 ; X86-X87-NEXT:    movl $0, %ebx
 ; X86-X87-NEXT:    jp .LBB29_12
 ; X86-X87-NEXT:  # %bb.11:
-; X86-X87-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-X87-NEXT:    movl %edi, %eax
+; X86-X87-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:    movl %esi, %eax
 ; X86-X87-NEXT:    movl %ebp, %edx
 ; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X86-X87-NEXT:  .LBB29_12:
@@ -3100,7 +3104,9 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
 ; X86-X87-NEXT:    retl $4
 ; X86-X87-NEXT:  .LBB29_1:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    movl $0, %edx
 ; X86-X87-NEXT:    jb .LBB29_4
 ; X86-X87-NEXT:  .LBB29_3:
@@ -4188,10 +4194,11 @@ define i100 @test_signed_i100_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:    xorl %edx, %edx
 ; X86-X87-NEXT:    movb %bh, %ah
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movl $-8, %ebx
+; X86-X87-NEXT:    movl $-8, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X86-X87-NEXT:    jb .LBB38_2
 ; X86-X87-NEXT:  # %bb.1:
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:  .LBB38_2:
 ; X86-X87-NEXT:    movl $0, %ecx
 ; X86-X87-NEXT:    movl $0, %ebp
@@ -4212,13 +4219,13 @@ define i100 @test_signed_i100_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:    fnstsw %ax
 ; X86-X87-NEXT:    # kill: def $ah killed $ah killed $ax
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movl $-1, %eax
 ; X86-X87-NEXT:    movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-X87-NEXT:    movl $-1, %eax
 ; X86-X87-NEXT:    movl $-1, %esi
 ; X86-X87-NEXT:    ja .LBB38_8
 ; X86-X87-NEXT:  # %bb.7:
-; X86-X87-NEXT:    movl %edi, %eax
-; X86-X87-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:    movl %ebp, %eax
 ; X86-X87-NEXT:    movl %ecx, %esi
 ; X86-X87-NEXT:  .LBB38_8:
 ; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -4226,7 +4233,7 @@ define i100 @test_signed_i100_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:    movl $7, %edi
 ; X86-X87-NEXT:    ja .LBB38_10
 ; X86-X87-NEXT:  # %bb.9:
-; X86-X87-NEXT:    movl %ebx, %edi
+; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X86-X87-NEXT:  .LBB38_10:
 ; X86-X87-NEXT:    fucomp %st(0)
 ; X86-X87-NEXT:    fnstsw %ax
@@ -4374,8 +4381,8 @@ define i128 @test_signed_i128_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X86-X87-NEXT:    movb %bh, %ah
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movl $0, %eax
-; X86-X87-NEXT:    movl $0, %ebx
+; X86-X87-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-X87-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X86-X87-NEXT:    jae .LBB39_1
 ; X86-X87-NEXT:  # %bb.2:
 ; X86-X87-NEXT:    movl $0, %edx
@@ -4386,7 +4393,6 @@ define i128 @test_signed_i128_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:  .LBB39_5:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-X87-NEXT:  .LBB39_6:
-; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    fldt {{\.?LCPI[0-9]+_[0-9]+}}
 ; X86-X87-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
 ; X86-X87-NEXT:    fucom %st(1)
@@ -4402,13 +4408,13 @@ define i128 @test_signed_i128_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-X87-NEXT:    movl $-1, %ebp
-; X86-X87-NEXT:    movl $-1, %edi
 ; X86-X87-NEXT:    movl $-1, %esi
+; X86-X87-NEXT:    movl $-1, %edi
 ; X86-X87-NEXT:    ja .LBB39_10
 ; X86-X87-NEXT:  # %bb.9:
 ; X86-X87-NEXT:    movl %edx, %ebp
-; X86-X87-NEXT:    movl %ebx, %edi
 ; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X86-X87-NEXT:  .LBB39_10:
 ; X86-X87-NEXT:    fucomp %st(0)
 ; X86-X87-NEXT:    fnstsw %ax
@@ -4419,8 +4425,8 @@ define i128 @test_signed_i128_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:    movl $0, %ebx
 ; X86-X87-NEXT:    jp .LBB39_12
 ; X86-X87-NEXT:  # %bb.11:
-; X86-X87-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-X87-NEXT:    movl %edi, %eax
+; X86-X87-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:    movl %esi, %eax
 ; X86-X87-NEXT:    movl %ebp, %edx
 ; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X86-X87-NEXT:  .LBB39_12:
@@ -4438,7 +4444,9 @@ define i128 @test_signed_i128_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:    retl $4
 ; X86-X87-NEXT:  .LBB39_1:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    movl $0, %edx
 ; X86-X87-NEXT:    jb .LBB39_4
 ; X86-X87-NEXT:  .LBB39_3:
diff --git a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
index a074c78d512f5..d074eaf7323d4 100644
--- a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
@@ -676,7 +676,7 @@ define i100 @test_unsigned_i100_f32(float %f) nounwind {
 ; X86-X87-NEXT:    movl %eax, %ebx
 ; X86-X87-NEXT:    calll __fixunssfti
 ; X86-X87-NEXT:    subl $4, %esp
-; X86-X87-NEXT:    xorl %edi, %edi
+; X86-X87-NEXT:    xorl %esi, %esi
 ; X86-X87-NEXT:    movb %bh, %ah
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    movl $0, %eax
@@ -684,16 +684,16 @@ define i100 @test_unsigned_i100_f32(float %f) nounwind {
 ; X86-X87-NEXT:  # %bb.1:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-X87-NEXT:  .LBB8_2:
-; X86-X87-NEXT:    movl $0, %esi
+; X86-X87-NEXT:    movl $0, %edi
 ; X86-X87-NEXT:    movl $0, %ebx
 ; X86-X87-NEXT:    jb .LBB8_4
 ; X86-X87-NEXT:  # %bb.3:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-X87-NEXT:  .LBB8_4:
 ; X86-X87-NEXT:    jb .LBB8_6
 ; X86-X87-NEXT:  # %bb.5:
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-X87-NEXT:  .LBB8_6:
 ; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -706,20 +706,20 @@ define i100 @test_unsigned_i100_f32(float %f) nounwind {
 ; X86-X87-NEXT:    movl $15, %eax
 ; X86-X87-NEXT:    ja .LBB8_8
 ; X86-X87-NEXT:  # %bb.7:
-; X86-X87-NEXT:    movl %edi, %eax
+; X86-X87-NEXT:    movl %esi, %eax
 ; X86-X87-NEXT:  .LBB8_8:
-; X86-X87-NEXT:    movl $-1, %edi
+; X86-X87-NEXT:    movl $-1, %esi
 ; X86-X87-NEXT:    movl $-1, %ebp
 ; X86-X87-NEXT:    movl $-1, %edx
 ; X86-X87-NEXT:    ja .LBB8_10
 ; X86-X87-NEXT:  # %bb.9:
-; X86-X87-NEXT:    movl %ebx, %edi
-; X86-X87-NEXT:    movl %esi, %ebp
+; X86-X87-NEXT:    movl %ebx, %esi
+; X86-X87-NEXT:    movl %edi, %ebp
 ; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X86-X87-NEXT:  .LBB8_10:
 ; X86-X87-NEXT:    movl %edx, 8(%ecx)
 ; X86-X87-NEXT:    movl %ebp, 4(%ecx)
-; X86-X87-NEXT:    movl %edi, (%ecx)
+; X86-X87-NEXT:    movl %esi, (%ecx)
 ; X86-X87-NEXT:    andl $15, %eax
 ; X86-X87-NEXT:    movb %al, 12(%ecx)
 ; X86-X87-NEXT:    movl %ecx, %eax
@@ -820,7 +820,7 @@ define i128 @test_unsigned_i128_f32(float %f) nounwind {
 ; X86-X87-NEXT:    movl %eax, %ebx
 ; X86-X87-NEXT:    calll __fixunssfti
 ; X86-X87-NEXT:    subl $4, %esp
-; X86-X87-NEXT:    xorl %edx, %edx
+; X86-X87-NEXT:    xorl %edi, %edi
 ; X86-X87-NEXT:    movb %bh, %ah
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    movl $0, %eax
@@ -828,19 +828,19 @@ define i128 @test_unsigned_i128_f32(float %f) nounwind {
 ; X86-X87-NEXT:  # %bb.1:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-X87-NEXT:  .LBB9_2:
-; X86-X87-NEXT:    movl $0, %ecx
+; X86-X87-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X86-X87-NEXT:    jb .LBB9_4
 ; X86-X87-NEXT:  # %bb.3:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-X87-NEXT:  .LBB9_4:
 ; X86-X87-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:  .LBB9_4:
 ; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-X87-NEXT:    movl $0, %ebx
 ; X86-X87-NEXT:    jb .LBB9_6
 ; X86-X87-NEXT:  # %bb.5:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-X87-NEXT:  .LBB9_6:
 ; X86-X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X86-X87-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
@@ -850,17 +850,17 @@ define i128 @test_unsigned_i128_f32(float %f) nounwind {
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    movl $-1, %eax
 ; X86-X87-NEXT:    movl $-1, %ebp
-; X86-X87-NEXT:    movl $-1, %edi
+; X86-X87-NEXT:    movl $-1, %edx
 ; X86-X87-NEXT:    movl $-1, %esi
 ; X86-X87-NEXT:    ja .LBB9_8
 ; X86-X87-NEXT:  # %bb.7:
 ; X86-X87-NEXT:    movl %ebx, %eax
-; X86-X87-NEXT:    movl %edx, %ebp
-; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-X87-NEXT:    movl %edi, %ebp
+; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X86-X87-NEXT:  .LBB9_8:
 ; X86-X87-NEXT:    movl %esi, 12(%ecx)
-; X86-X87-NEXT:    movl %edi, 8(%ecx)
+; X86-X87-NEXT:    movl %edx, 8(%ecx)
 ; X86-X87-NEXT:    movl %ebp, 4(%ecx)
 ; X86-X87-NEXT:    movl %eax, (%ecx)
 ; X86-X87-NEXT:    movl %ecx, %eax
@@ -1588,7 +1588,7 @@ define i100 @test_unsigned_i100_f64(double %f) nounwind {
 ; X86-X87-NEXT:    movl %eax, %ebx
 ; X86-X87-NEXT:    calll __fixunsdfti
 ; X86-X87-NEXT:    subl $4, %esp
-; X86-X87-NEXT:    xorl %edi, %edi
+; X86-X87-NEXT:    xorl %esi, %esi
 ; X86-X87-NEXT:    movb %bh, %ah
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    movl $0, %eax
@@ -1596,16 +1596,16 @@ define i100 @test_unsigned_i100_f64(double %f) nounwind {
 ; X86-X87-NEXT:  # %bb.1:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-X87-NEXT:  .LBB18_2:
-; X86-X87-NEXT:    movl $0, %esi
+; X86-X87-NEXT:    movl $0, %edi
 ; X86-X87-NEXT:    movl $0, %ebx
 ; X86-X87-NEXT:    jb .LBB18_4
 ; X86-X87-NEXT:  # %bb.3:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-X87-NEXT:  .LBB18_4:
 ; X86-X87-NEXT:    jb .LBB18_6
 ; X86-X87-NEXT:  # %bb.5:
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-X87-NEXT:  .LBB18_6:
 ; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1618,20 +1618,20 @@ define i100 @test_unsigned_i100_f64(double %f) nounwind {
 ; X86-X87-NEXT:    movl $15, %eax
 ; X86-X87-NEXT:    ja .LBB18_8
 ; X86-X87-NEXT:  # %bb.7:
-; X86-X87-NEXT:    movl %edi, %eax
+; X86-X87-NEXT:    movl %esi, %eax
 ; X86-X87-NEXT:  .LBB18_8:
-; X86-X87-NEXT:    movl $-1, %edi
+; X86-X87-NEXT:    movl $-1, %esi
 ; X86-X87-NEXT:    movl $-1, %ebp
 ; X86-X87-NEXT:    movl $-1, %edx
 ; X86-X87-NEXT:    ja .LBB18_10
 ; X86-X87-NEXT:  # %bb.9:
-; X86-X87-NEXT:    movl %ebx, %edi
-; X86-X87-NEXT:    movl %esi, %ebp
+; X86-X87-NEXT:    movl %ebx, %esi
+; X86-X87-NEXT:    movl %edi, %ebp
 ; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X86-X87-NEXT:  .LBB18_10:
 ; X86-X87-NEXT:    movl %edx, 8(%ecx)
 ; X86-X87-NEXT:    movl %ebp, 4(%ecx)
-; X86-X87-NEXT:    movl %edi, (%ecx)
+; X86-X87-NEXT:    movl %esi, (%ecx)
 ; X86-X87-NEXT:    andl $15, %eax
 ; X86-X87-NEXT:    movb %al, 12(%ecx)
 ; X86-X87-NEXT:    movl %ecx, %eax
@@ -1732,7 +1732,7 @@ define i128 @test_unsigned_i128_f64(double %f) nounwind {
 ; X86-X87-NEXT:    movl %eax, %ebx
 ; X86-X87-NEXT:    calll __fixunsdfti
 ; X86-X87-NEXT:    subl $4, %esp
-; X86-X87-NEXT:    xorl %edx, %edx
+; X86-X87-NEXT:    xorl %edi, %edi
 ; X86-X87-NEXT:    movb %bh, %ah
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    movl $0, %eax
@@ -1740,19 +1740,19 @@ define i128 @test_unsigned_i128_f64(double %f) nounwind {
 ; X86-X87-NEXT:  # %bb.1:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-X87-NEXT:  .LBB19_2:
-; X86-X87-NEXT:    movl $0, %ecx
+; X86-X87-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X86-X87-NEXT:    jb .LBB19_4
 ; X86-X87-NEXT:  # %bb.3:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-X87-NEXT:  .LBB19_4:
 ; X86-X87-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:  .LBB19_4:
 ; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-X87-NEXT:    movl $0, %ebx
 ; X86-X87-NEXT:    jb .LBB19_6
 ; X86-X87-NEXT:  # %bb.5:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-X87-NEXT:  .LBB19_6:
 ; X86-X87-NEXT:    fldl {{\.?LCPI[0-9]+_[0-9]+}}
 ; X86-X87-NEXT:    fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
@@ -1762,17 +1762,17 @@ define i128 @test_unsigned_i128_f64(double %f) nounwind {
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    movl $-1, %eax
 ; X86-X87-NEXT:    movl $-1, %ebp
-; X86-X87-NEXT:    movl $-1, %edi
+; X86-X87-NEXT:    movl $-1, %edx
 ; X86-X87-NEXT:    movl $-1, %esi
 ; X86-X87-NEXT:    ja .LBB19_8
 ; X86-X87-NEXT:  # %bb.7:
 ; X86-X87-NEXT:    movl %ebx, %eax
-; X86-X87-NEXT:    movl %edx, %ebp
-; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-X87-NEXT:    movl %edi, %ebp
+; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X86-X87-NEXT:  .LBB19_8:
 ; X86-X87-NEXT:    movl %esi, 12(%ecx)
-; X86-X87-NEXT:    movl %edi, 8(%ecx)
+; X86-X87-NEXT:    movl %edx, 8(%ecx)
 ; X86-X87-NEXT:    movl %ebp, 4(%ecx)
 ; X86-X87-NEXT:    movl %eax, (%ecx)
 ; X86-X87-NEXT:    movl %ecx, %eax
@@ -2627,7 +2627,7 @@ define i100 @test_unsigned_i100_f16(half %f) nounwind {
 ; X86-X87-NEXT:    movl %eax, %ebx
 ; X86-X87-NEXT:    calll __fixunssfti
 ; X86-X87-NEXT:    subl $4, %esp
-; X86-X87-NEXT:    xorl %edi, %edi
+; X86-X87-NEXT:    xorl %esi, %esi
 ; X86-X87-NEXT:    movb %bh, %ah
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    movl $0, %eax
@@ -2635,16 +2635,16 @@ define i100 @test_unsigned_i100_f16(half %f) nounwind {
 ; X86-X87-NEXT:  # %bb.1:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-X87-NEXT:  .LBB28_2:
-; X86-X87-NEXT:    movl $0, %esi
+; X86-X87-NEXT:    movl $0, %edi
 ; X86-X87-NEXT:    movl $0, %ebx
 ; X86-X87-NEXT:    jb .LBB28_4
 ; X86-X87-NEXT:  # %bb.3:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-X87-NEXT:  .LBB28_4:
 ; X86-X87-NEXT:    jb .LBB28_6
 ; X86-X87-NEXT:  # %bb.5:
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-X87-NEXT:  .LBB28_6:
 ; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2657,20 +2657,20 @@ define i100 @test_unsigned_i100_f16(half %f) nounwind {
 ; X86-X87-NEXT:    movl $15, %eax
 ; X86-X87-NEXT:    ja .LBB28_8
 ; X86-X87-NEXT:  # %bb.7:
-; X86-X87-NEXT:    movl %edi, %eax
+; X86-X87-NEXT:    movl %esi, %eax
 ; X86-X87-NEXT:  .LBB28_8:
-; X86-X87-NEXT:    movl $-1, %edi
+; X86-X87-NEXT:    movl $-1, %esi
 ; X86-X87-NEXT:    movl $-1, %ebp
 ; X86-X87-NEXT:    movl $-1, %edx
 ; X86-X87-NEXT:    ja .LBB28_10
 ; X86-X87-NEXT:  # %bb.9:
-; X86-X87-NEXT:    movl %ebx, %edi
-; X86-X87-NEXT:    movl %esi, %ebp
+; X86-X87-NEXT:    movl %ebx, %esi
+; X86-X87-NEXT:    movl %edi, %ebp
 ; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X86-X87-NEXT:  .LBB28_10:
 ; X86-X87-NEXT:    movl %edx, 8(%ecx)
 ; X86-X87-NEXT:    movl %ebp, 4(%ecx)
-; X86-X87-NEXT:    movl %edi, (%ecx)
+; X86-X87-NEXT:    movl %esi, (%ecx)
 ; X86-X87-NEXT:    andl $15, %eax
 ; X86-X87-NEXT:    movb %al, 12(%ecx)
 ; X86-X87-NEXT:    movl %ecx, %eax
@@ -2781,7 +2781,7 @@ define i128 @test_unsigned_i128_f16(half %f) nounwind {
 ; X86-X87-NEXT:    movl %eax, %ebx
 ; X86-X87-NEXT:    calll __fixunssfti
 ; X86-X87-NEXT:    subl $4, %esp
-; X86-X87-NEXT:    xorl %edx, %edx
+; X86-X87-NEXT:    xorl %edi, %edi
 ; X86-X87-NEXT:    movb %bh, %ah
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    movl $0, %eax
@@ -2789,19 +2789,19 @@ define i128 @test_unsigned_i128_f16(half %f) nounwind {
 ; X86-X87-NEXT:  # %bb.1:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-X87-NEXT:  .LBB29_2:
-; X86-X87-NEXT:    movl $0, %ecx
+; X86-X87-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X86-X87-NEXT:    jb .LBB29_4
 ; X86-X87-NEXT:  # %bb.3:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-X87-NEXT:  .LBB29_4:
 ; X86-X87-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:  .LBB29_4:
 ; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-X87-NEXT:    movl $0, %ebx
 ; X86-X87-NEXT:    jb .LBB29_6
 ; X86-X87-NEXT:  # %bb.5:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-X87-NEXT:  .LBB29_6:
 ; X86-X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X86-X87-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
@@ -2811,17 +2811,17 @@ define i128 @test_unsigned_i128_f16(half %f) nounwind {
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    movl $-1, %eax
 ; X86-X87-NEXT:    movl $-1, %ebp
-; X86-X87-NEXT:    movl $-1, %edi
+; X86-X87-NEXT:    movl $-1, %edx
 ; X86-X87-NEXT:    movl $-1, %esi
 ; X86-X87-NEXT:    ja .LBB29_8
 ; X86-X87-NEXT:  # %bb.7:
 ; X86-X87-NEXT:    movl %ebx, %eax
-; X86-X87-NEXT:    movl %edx, %ebp
-; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-X87-NEXT:    movl %edi, %ebp
+; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X86-X87-NEXT:  .LBB29_8:
 ; X86-X87-NEXT:    movl %esi, 12(%ecx)
-; X86-X87-NEXT:    movl %edi, 8(%ecx)
+; X86-X87-NEXT:    movl %edx, 8(%ecx)
 ; X86-X87-NEXT:    movl %ebp, 4(%ecx)
 ; X86-X87-NEXT:    movl %eax, (%ecx)
 ; X86-X87-NEXT:    movl %ecx, %eax
@@ -3842,7 +3842,7 @@ define i100 @test_unsigned_i100_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:    movl %eax, %ebx
 ; X86-X87-NEXT:    calll __fixunsxfti
 ; X86-X87-NEXT:    subl $4, %esp
-; X86-X87-NEXT:    xorl %edi, %edi
+; X86-X87-NEXT:    xorl %esi, %esi
 ; X86-X87-NEXT:    movb %bh, %ah
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    movl $0, %eax
@@ -3850,16 +3850,16 @@ define i100 @test_unsigned_i100_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:  # %bb.1:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-X87-NEXT:  .LBB38_2:
-; X86-X87-NEXT:    movl $0, %esi
+; X86-X87-NEXT:    movl $0, %edi
 ; X86-X87-NEXT:    movl $0, %ebx
 ; X86-X87-NEXT:    jb .LBB38_4
 ; X86-X87-NEXT:  # %bb.3:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-X87-NEXT:  .LBB38_4:
 ; X86-X87-NEXT:    jb .LBB38_6
 ; X86-X87-NEXT:  # %bb.5:
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-X87-NEXT:  .LBB38_6:
 ; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -3872,20 +3872,20 @@ define i100 @test_unsigned_i100_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:    movl $15, %eax
 ; X86-X87-NEXT:    ja .LBB38_8
 ; X86-X87-NEXT:  # %bb.7:
-; X86-X87-NEXT:    movl %edi, %eax
+; X86-X87-NEXT:    movl %esi, %eax
 ; X86-X87-NEXT:  .LBB38_8:
-; X86-X87-NEXT:    movl $-1, %edi
+; X86-X87-NEXT:    movl $-1, %esi
 ; X86-X87-NEXT:    movl $-1, %ebp
 ; X86-X87-NEXT:    movl $-1, %edx
 ; X86-X87-NEXT:    ja .LBB38_10
 ; X86-X87-NEXT:  # %bb.9:
-; X86-X87-NEXT:    movl %ebx, %edi
-; X86-X87-NEXT:    movl %esi, %ebp
+; X86-X87-NEXT:    movl %ebx, %esi
+; X86-X87-NEXT:    movl %edi, %ebp
 ; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X86-X87-NEXT:  .LBB38_10:
 ; X86-X87-NEXT:    movl %edx, 8(%ecx)
 ; X86-X87-NEXT:    movl %ebp, 4(%ecx)
-; X86-X87-NEXT:    movl %edi, (%ecx)
+; X86-X87-NEXT:    movl %esi, (%ecx)
 ; X86-X87-NEXT:    andl $15, %eax
 ; X86-X87-NEXT:    movb %al, 12(%ecx)
 ; X86-X87-NEXT:    movl %ecx, %eax
@@ -4000,7 +4000,7 @@ define i128 @test_unsigned_i128_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:    movl %eax, %ebx
 ; X86-X87-NEXT:    calll __fixunsxfti
 ; X86-X87-NEXT:    subl $4, %esp
-; X86-X87-NEXT:    xorl %edx, %edx
+; X86-X87-NEXT:    xorl %edi, %edi
 ; X86-X87-NEXT:    movb %bh, %ah
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    movl $0, %eax
@@ -4008,19 +4008,19 @@ define i128 @test_unsigned_i128_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:  # %bb.1:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-X87-NEXT:  .LBB39_2:
-; X86-X87-NEXT:    movl $0, %ecx
+; X86-X87-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X86-X87-NEXT:    jb .LBB39_4
 ; X86-X87-NEXT:  # %bb.3:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-X87-NEXT:  .LBB39_4:
 ; X86-X87-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT:  .LBB39_4:
 ; X86-X87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-X87-NEXT:    movl $0, %ebx
 ; X86-X87-NEXT:    jb .LBB39_6
 ; X86-X87-NEXT:  # %bb.5:
 ; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-X87-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-X87-NEXT:  .LBB39_6:
 ; X86-X87-NEXT:    fldt {{\.?LCPI[0-9]+_[0-9]+}}
 ; X86-X87-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
@@ -4030,17 +4030,17 @@ define i128 @test_unsigned_i128_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    movl $-1, %eax
 ; X86-X87-NEXT:    movl $-1, %ebp
-; X86-X87-NEXT:    movl $-1, %edi
+; X86-X87-NEXT:    movl $-1, %edx
 ; X86-X87-NEXT:    movl $-1, %esi
 ; X86-X87-NEXT:    ja .LBB39_8
 ; X86-X87-NEXT:  # %bb.7:
 ; X86-X87-NEXT:    movl %ebx, %eax
-; X86-X87-NEXT:    movl %edx, %ebp
-; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-X87-NEXT:    movl %edi, %ebp
+; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X86-X87-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X86-X87-NEXT:  .LBB39_8:
 ; X86-X87-NEXT:    movl %esi, 12(%ecx)
-; X86-X87-NEXT:    movl %edi, 8(%ecx)
+; X86-X87-NEXT:    movl %edx, 8(%ecx)
 ; X86-X87-NEXT:    movl %ebp, 4(%ecx)
 ; X86-X87-NEXT:    movl %eax, (%ecx)
 ; X86-X87-NEXT:    movl %ecx, %eax
diff --git a/llvm/test/CodeGen/X86/indirect-branch-tracking-eh.ll b/llvm/test/CodeGen/X86/indirect-branch-tracking-eh.ll
index 8403d4b754d80..d936c7b335486 100644
--- a/llvm/test/CodeGen/X86/indirect-branch-tracking-eh.ll
+++ b/llvm/test/CodeGen/X86/indirect-branch-tracking-eh.ll
@@ -49,7 +49,6 @@
 
 ; SJLJ:       LBB0_3:
 ; SJLJ-NEXT:          endbr32
-; SJLJ-NEXT:          leal
 ; SJLJ-NEXT:          movl
 ; SJLJ-NEXT:          cmpl
 ; SJLJ-NEXT:          jb      LBB0_4
diff --git a/llvm/test/CodeGen/X86/no-split-size.ll b/llvm/test/CodeGen/X86/no-split-size.ll
index c1f93acd77dee..d2ae2ffffa819 100644
--- a/llvm/test/CodeGen/X86/no-split-size.ll
+++ b/llvm/test/CodeGen/X86/no-split-size.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -regalloc-csr-cost-scale=0 | FileCheck %s
 
 ; @foo is optimized for size. Variables %p2, %p3, %p4, %p5 and %p6 are not split
 ; in cold blocks.
diff --git a/llvm/test/CodeGen/X86/ragreedy-bug.ll b/llvm/test/CodeGen/X86/ragreedy-bug.ll
index 0d9332ef9cdd1..67e080eaf1d9a 100644
--- a/llvm/test/CodeGen/X86/ragreedy-bug.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-bug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-macosx -regalloc=greedy | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -regalloc=greedy --regalloc-csr-cost-scale=0 | FileCheck %s
 
 ; This testing case is reduced from 197.parser prune_match function.
 ; We make sure register copies are not generated on isupper.exit blocks.
diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
index 69a6cdb7081eb..60ebb4bd41ebb 100644
--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -78,9 +78,10 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  LBB0_8: ## %while.body.preheader
 ; CHECK-NEXT:    imulq $1040, %rdx, %rax ## imm = 0x410
 ; CHECK-NEXT:    movq _syBuf at GOTPCREL(%rip), %rcx
-; CHECK-NEXT:    leaq 8(%rcx,%rax), %rdx
+; CHECK-NEXT:    leaq 8(%rcx,%rax), %rax
+; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
 ; CHECK-NEXT:    movq _syCTRO at GOTPCREL(%rip), %rax
-; CHECK-NEXT:    movl $1, %r13d
+; CHECK-NEXT:    movl $1, %ebp
 ; CHECK-NEXT:    movb $1, %cl
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  LBB0_9: ## %do.body
@@ -89,9 +90,8 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    testb %cl, %cl
 ; CHECK-NEXT:    jne LBB0_9
 ; CHECK-NEXT:  ## %bb.10: ## %do.end
-; CHECK-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
 ; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    xorl %ebp, %ebp
+; CHECK-NEXT:    xorl %ebx, %ebx
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    jne LBB0_11
 ; CHECK-NEXT:  ## %bb.12: ## %while.body200.preheader
@@ -100,7 +100,7 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    leaq LJTI0_1(%rip), %r14
 ; CHECK-NEXT:    movb $1, %sil
 ; CHECK-NEXT:    movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
-; CHECK-NEXT:    xorl %r15d, %r15d
+; CHECK-NEXT:    xorl %r13d, %r13d
 ; CHECK-NEXT:    jmp LBB0_13
 ; CHECK-NEXT:  LBB0_43: ## %while.cond1037.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
@@ -109,15 +109,15 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  LBB0_20: ## %while.cond197.backedge
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    decl %r13d
-; CHECK-NEXT:    testl %r13d, %r13d
-; CHECK-NEXT:    movl %ebp, %r15d
+; CHECK-NEXT:    decl %ebp
+; CHECK-NEXT:    testl %ebp, %ebp
+; CHECK-NEXT:    movl %ebx, %r13d
 ; CHECK-NEXT:    jle LBB0_21
 ; CHECK-NEXT:  LBB0_13: ## %while.body200
 ; CHECK-NEXT:    ## =>This Loop Header: Depth=1
 ; CHECK-NEXT:    ## Child Loop BB0_28 Depth 2
 ; CHECK-NEXT:    ## Child Loop BB0_37 Depth 2
-; CHECK-NEXT:    leal -268(%rbp), %eax
+; CHECK-NEXT:    leal -268(%rbx), %eax
 ; CHECK-NEXT:    cmpl $105, %eax
 ; CHECK-NEXT:    ja LBB0_14
 ; CHECK-NEXT:  ## %bb.55: ## %while.body200
@@ -128,12 +128,12 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  LBB0_25: ## %sw.bb474
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    testb %r12b, %r12b
-; CHECK-NEXT:    ## implicit-def: $rbx
+; CHECK-NEXT:    ## implicit-def: $r15
 ; CHECK-NEXT:    jne LBB0_33
 ; CHECK-NEXT:  ## %bb.26: ## %do.body479.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    testb %r12b, %r12b
-; CHECK-NEXT:    ## implicit-def: $rbx
+; CHECK-NEXT:    ## implicit-def: $r15
 ; CHECK-NEXT:    jne LBB0_33
 ; CHECK-NEXT:  ## %bb.27: ## %land.rhs485.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
@@ -142,7 +142,7 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  LBB0_31: ## %do.body479.backedge
 ; CHECK-NEXT:    ## in Loop: Header=BB0_28 Depth=2
-; CHECK-NEXT:    leaq 1(%rbx), %rax
+; CHECK-NEXT:    leaq 1(%r15), %rax
 ; CHECK-NEXT:    testb %r12b, %r12b
 ; CHECK-NEXT:    je LBB0_32
 ; CHECK-NEXT:  LBB0_28: ## %land.rhs485
@@ -152,7 +152,7 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jne LBB0_54
 ; CHECK-NEXT:  ## %bb.29: ## %cond.true.i.i2780
 ; CHECK-NEXT:    ## in Loop: Header=BB0_28 Depth=2
-; CHECK-NEXT:    movq %rax, %rbx
+; CHECK-NEXT:    movq %rax, %r15
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    jne LBB0_31
 ; CHECK-NEXT:  ## %bb.30: ## %lor.rhs500
@@ -167,7 +167,7 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  LBB0_14: ## %while.body200
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    leal 1(%rbp), %eax
+; CHECK-NEXT:    leal 1(%rbx), %eax
 ; CHECK-NEXT:    cmpl $21, %eax
 ; CHECK-NEXT:    ja LBB0_20
 ; CHECK-NEXT:  ## %bb.15: ## %while.body200
@@ -177,7 +177,7 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jmpq *%rax
 ; CHECK-NEXT:  LBB0_18: ## %while.cond201.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $1, %ebp
+; CHECK-NEXT:    movl $1, %ebx
 ; CHECK-NEXT:    jmp LBB0_20
 ; CHECK-NEXT:  LBB0_44: ## %sw.bb1134
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
@@ -187,22 +187,22 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jb LBB0_54
 ; CHECK-NEXT:  ## %bb.45: ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
-; CHECK-NEXT:    movl $268, %ebp ## imm = 0x10C
+; CHECK-NEXT:    movl $268, %ebx ## imm = 0x10C
 ; CHECK-NEXT:    jmp LBB0_20
 ; CHECK-NEXT:  LBB0_39: ## %sw.bb566
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $20, %ebp
+; CHECK-NEXT:    movl $20, %ebx
 ; CHECK-NEXT:    jmp LBB0_20
 ; CHECK-NEXT:  LBB0_19: ## %sw.bb243
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $2, %ebp
+; CHECK-NEXT:    movl $2, %ebx
 ; CHECK-NEXT:    jmp LBB0_20
 ; CHECK-NEXT:  LBB0_32: ## %if.end517.loopexitsplit
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    incq %rbx
+; CHECK-NEXT:    incq %r15
 ; CHECK-NEXT:  LBB0_33: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    leal -324(%r15), %eax
+; CHECK-NEXT:    leal -324(%r13), %eax
 ; CHECK-NEXT:    cmpl $59, %eax
 ; CHECK-NEXT:    ja LBB0_34
 ; CHECK-NEXT:  ## %bb.56: ## %if.end517
@@ -212,11 +212,11 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jb LBB0_37
 ; CHECK-NEXT:  LBB0_34: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    cmpl $11, %r15d
+; CHECK-NEXT:    cmpl $11, %r13d
 ; CHECK-NEXT:    je LBB0_37
 ; CHECK-NEXT:  ## %bb.35: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    cmpl $24, %r15d
+; CHECK-NEXT:    cmpl $24, %r13d
 ; CHECK-NEXT:    je LBB0_37
 ; CHECK-NEXT:  ## %bb.36: ## %if.then532
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
@@ -231,7 +231,7 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  ## %bb.38: ## %for.cond542.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    movb $0, (%rbx)
+; CHECK-NEXT:    movb $0, (%r15)
 ; CHECK-NEXT:    leaq LJTI0_0(%rip), %rdx
 ; CHECK-NEXT:    jmp LBB0_20
 ; CHECK-NEXT:    .p2align 4
@@ -249,12 +249,12 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  LBB0_11:
 ; CHECK-NEXT:    movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
 ; CHECK-NEXT:  LBB0_21: ## %while.end1465
-; CHECK-NEXT:    incl %ebp
-; CHECK-NEXT:    cmpl $16, %ebp
+; CHECK-NEXT:    incl %ebx
+; CHECK-NEXT:    cmpl $16, %ebx
 ; CHECK-NEXT:    ja LBB0_49
 ; CHECK-NEXT:  ## %bb.22: ## %while.end1465
 ; CHECK-NEXT:    movl $83969, %eax ## imm = 0x14801
-; CHECK-NEXT:    btl %ebp, %eax
+; CHECK-NEXT:    btl %ebx, %eax
 ; CHECK-NEXT:    jae LBB0_49
 ; CHECK-NEXT:  ## %bb.23:
 ; CHECK-NEXT:    xorl %ebx, %ebx
@@ -294,19 +294,19 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  ## %bb.50: ## %for.body1664.lr.ph
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
 ; CHECK-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ebp ## 4-byte Reload
 ; CHECK-NEXT:    jne LBB0_53
 ; CHECK-NEXT:  ## %bb.51: ## %while.body1679.preheader
 ; CHECK-NEXT:    incl %ebp
-; CHECK-NEXT:    xorl %ebx, %ebx
+; CHECK-NEXT:    xorl %r14d, %r14d
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  LBB0_52: ## %while.body1679
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movq (%r14), %rdi
+; CHECK-NEXT:    movq (%rbx), %rdi
 ; CHECK-NEXT:    callq _fileno
 ; CHECK-NEXT:    incl %ebp
-; CHECK-NEXT:    testb %bl, %bl
+; CHECK-NEXT:    testb %r14b, %r14b
 ; CHECK-NEXT:    jne LBB0_52
 ; CHECK-NEXT:  LBB0_53: ## %while.cond1683.preheader
 ; CHECK-NEXT:    testb %al, %al
diff --git a/llvm/test/CodeGen/X86/sjlj-eh.ll b/llvm/test/CodeGen/X86/sjlj-eh.ll
index d2dcb35a4908e..2c9aa6fd7d55d 100644
--- a/llvm/test/CodeGen/X86/sjlj-eh.ll
+++ b/llvm/test/CodeGen/X86/sjlj-eh.ll
@@ -1,5 +1,5 @@
 ; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39439.
-; RUN: llc -mtriple i386-windows-gnu -exception-model sjlj -filetype asm -o - %s -verify-machineinstrs=0 | FileCheck %s
+; RUN: llc -mtriple i386-windows-gnu -regalloc-csr-cost-scale=0 -exception-model sjlj -filetype asm -o - %s -verify-machineinstrs=0 | FileCheck %s
 ; RUN: llc -mtriple x86_64-windows-gnu -exception-model sjlj -filetype asm -o - %s -verify-machineinstrs=0 | FileCheck %s -check-prefix CHECK-X64
 ; RUN: llc -mtriple x86_64-linux -exception-model sjlj -filetype asm -o - %s -verify-machineinstrs=0 | FileCheck %s -check-prefix CHECK-X64-LINUX
 
diff --git a/llvm/test/CodeGen/X86/speculative-load-hardening.ll b/llvm/test/CodeGen/X86/speculative-load-hardening.ll
index 5fd1f77e166d4..5f107818e9fa1 100644
--- a/llvm/test/CodeGen/X86/speculative-load-hardening.ll
+++ b/llvm/test/CodeGen/X86/speculative-load-hardening.ll
@@ -32,87 +32,85 @@ entry:
 define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, ptr %ptr1, ptr %ptr2, ptr %ptr3) speculative_load_hardening {
 ; X64-LABEL: test_basic_conditions:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %r15
-; X64-NEXT:    .cfi_def_cfa_offset 16
 ; X64-NEXT:    pushq %r14
-; X64-NEXT:    .cfi_def_cfa_offset 24
+; X64-NEXT:    .cfi_def_cfa_offset 16
 ; X64-NEXT:    pushq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 24
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    .cfi_def_cfa_offset 32
-; X64-NEXT:    .cfi_offset %rbx, -32
-; X64-NEXT:    .cfi_offset %r14, -24
-; X64-NEXT:    .cfi_offset %r15, -16
+; X64-NEXT:    .cfi_offset %rbx, -24
+; X64-NEXT:    .cfi_offset %r14, -16
 ; X64-NEXT:    movq %rsp, %rax
-; X64-NEXT:    movq $-1, %rbx
+; X64-NEXT:    movq $-1, %r10
 ; X64-NEXT:    sarq $63, %rax
 ; X64-NEXT:    testl %edi, %edi
 ; X64-NEXT:    jne .LBB1_1
 ; X64-NEXT:  # %bb.2: # %then1
-; X64-NEXT:    cmovneq %rbx, %rax
+; X64-NEXT:    cmovneq %r10, %rax
 ; X64-NEXT:    testl %esi, %esi
 ; X64-NEXT:    je .LBB1_4
 ; X64-NEXT:  .LBB1_1:
-; X64-NEXT:    cmoveq %rbx, %rax
+; X64-NEXT:    cmoveq %r10, %rax
 ; X64-NEXT:  .LBB1_8: # %exit
 ; X64-NEXT:    shlq $47, %rax
 ; X64-NEXT:    orq %rax, %rsp
-; X64-NEXT:    popq %rbx
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 24
-; X64-NEXT:    popq %r14
+; X64-NEXT:    popq %rbx
 ; X64-NEXT:    .cfi_def_cfa_offset 16
-; X64-NEXT:    popq %r15
+; X64-NEXT:    popq %r14
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB1_4: # %then2
 ; X64-NEXT:    .cfi_def_cfa_offset 32
-; X64-NEXT:    cmovneq %rbx, %rax
+; X64-NEXT:    cmovneq %r10, %rax
 ; X64-NEXT:    testl %edx, %edx
 ; X64-NEXT:    je .LBB1_6
 ; X64-NEXT:  # %bb.5: # %else3
-; X64-NEXT:    cmoveq %rbx, %rax
-; X64-NEXT:    movslq (%r9), %rcx
-; X64-NEXT:    orq %rax, %rcx
-; X64-NEXT:    leaq (%r8,%rcx,4), %r14
-; X64-NEXT:    movl %ecx, (%r8,%rcx,4)
+; X64-NEXT:    cmoveq %r10, %rax
+; X64-NEXT:    movslq (%r9), %rdx
+; X64-NEXT:    orq %rax, %rdx
+; X64-NEXT:    leaq (%r8,%rdx,4), %rcx
+; X64-NEXT:    movl %edx, (%r8,%rdx,4)
 ; X64-NEXT:    jmp .LBB1_7
 ; X64-NEXT:  .LBB1_6: # %then3
-; X64-NEXT:    cmovneq %rbx, %rax
+; X64-NEXT:    cmovneq %r10, %rax
 ; X64-NEXT:    movl (%rcx), %ecx
 ; X64-NEXT:    addl (%r8), %ecx
 ; X64-NEXT:    movslq %ecx, %rdi
 ; X64-NEXT:    orq %rax, %rdi
 ; X64-NEXT:    movl (%r8,%rdi,4), %esi
 ; X64-NEXT:    orl %eax, %esi
-; X64-NEXT:    movq (%r9), %r14
-; X64-NEXT:    orq %rax, %r14
-; X64-NEXT:    addl (%r14), %esi
+; X64-NEXT:    movq (%r9), %rbx
+; X64-NEXT:    orq %rax, %rbx
+; X64-NEXT:    addl (%rbx), %esi
 ; X64-NEXT:    shlq $47, %rax
 ; X64-NEXT:    # kill: def $edi killed $edi killed $rdi
 ; X64-NEXT:    orq %rax, %rsp
-; X64-NEXT:    movq %r8, %r15
+; X64-NEXT:    movq %r8, (%rsp) # 8-byte Spill
+; X64-NEXT:    movq $-1, %r14
 ; X64-NEXT:    callq leak at PLT
 ; X64-NEXT:  .Lslh_ret_addr0:
-; X64-NEXT:    movq %r15, %r8
+; X64-NEXT:    movq %rbx, %rcx
+; X64-NEXT:    movq (%rsp), %r8 # 8-byte Reload
 ; X64-NEXT:    movq %rsp, %rax
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdx
 ; X64-NEXT:    sarq $63, %rax
-; X64-NEXT:    cmpq $.Lslh_ret_addr0, %rcx
-; X64-NEXT:    cmovneq %rbx, %rax
+; X64-NEXT:    cmpq $.Lslh_ret_addr0, %rdx
+; X64-NEXT:    cmovneq %r14, %rax
 ; X64-NEXT:  .LBB1_7: # %merge
-; X64-NEXT:    movslq (%r14), %rcx
+; X64-NEXT:    movslq (%rcx), %rcx
 ; X64-NEXT:    orq %rax, %rcx
 ; X64-NEXT:    movl $0, (%r8,%rcx,4)
 ; X64-NEXT:    jmp .LBB1_8
 ;
 ; X64-LFENCE-LABEL: test_basic_conditions:
 ; X64-LFENCE:       # %bb.0: # %entry
-; X64-LFENCE-NEXT:    pushq %r14
-; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 16
 ; X64-LFENCE-NEXT:    pushq %rbx
-; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 24
-; X64-LFENCE-NEXT:    pushq %rax
+; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 16
+; X64-LFENCE-NEXT:    subq $16, %rsp
 ; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 32
-; X64-LFENCE-NEXT:    .cfi_offset %rbx, -24
-; X64-LFENCE-NEXT:    .cfi_offset %r14, -16
+; X64-LFENCE-NEXT:    .cfi_offset %rbx, -16
 ; X64-LFENCE-NEXT:    testl %edi, %edi
 ; X64-LFENCE-NEXT:    jne .LBB1_6
 ; X64-LFENCE-NEXT:  # %bb.1: # %then1
@@ -125,9 +123,9 @@ define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, ptr %ptr1, ptr %ptr2,
 ; X64-LFENCE-NEXT:    je .LBB1_3
 ; X64-LFENCE-NEXT:  # %bb.4: # %else3
 ; X64-LFENCE-NEXT:    lfence
-; X64-LFENCE-NEXT:    movslq (%r9), %rax
-; X64-LFENCE-NEXT:    leaq (%r8,%rax,4), %rbx
-; X64-LFENCE-NEXT:    movl %eax, (%r8,%rax,4)
+; X64-LFENCE-NEXT:    movslq (%r9), %rcx
+; X64-LFENCE-NEXT:    leaq (%r8,%rcx,4), %rax
+; X64-LFENCE-NEXT:    movl %ecx, (%r8,%rcx,4)
 ; X64-LFENCE-NEXT:    jmp .LBB1_5
 ; X64-LFENCE-NEXT:  .LBB1_3: # %then3
 ; X64-LFENCE-NEXT:    lfence
@@ -138,19 +136,18 @@ define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, ptr %ptr1, ptr %ptr2,
 ; X64-LFENCE-NEXT:    movq (%r9), %rbx
 ; X64-LFENCE-NEXT:    addl (%rbx), %esi
 ; X64-LFENCE-NEXT:    # kill: def $edi killed $edi killed $rdi
-; X64-LFENCE-NEXT:    movq %r8, %r14
+; X64-LFENCE-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-LFENCE-NEXT:    callq leak at PLT
-; X64-LFENCE-NEXT:    movq %r14, %r8
+; X64-LFENCE-NEXT:    movq %rbx, %rax
+; X64-LFENCE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
 ; X64-LFENCE-NEXT:  .LBB1_5: # %merge
-; X64-LFENCE-NEXT:    movslq (%rbx), %rax
+; X64-LFENCE-NEXT:    movslq (%rax), %rax
 ; X64-LFENCE-NEXT:    movl $0, (%r8,%rax,4)
 ; X64-LFENCE-NEXT:  .LBB1_6: # %exit
 ; X64-LFENCE-NEXT:    lfence
-; X64-LFENCE-NEXT:    addq $8, %rsp
-; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 24
-; X64-LFENCE-NEXT:    popq %rbx
+; X64-LFENCE-NEXT:    addq $16, %rsp
 ; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 16
-; X64-LFENCE-NEXT:    popq %r14
+; X64-LFENCE-NEXT:    popq %rbx
 ; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 8
 ; X64-LFENCE-NEXT:    retq
 entry:
@@ -507,17 +504,11 @@ define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_harden
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    pushq %rbp
 ; X64-NEXT:    .cfi_def_cfa_offset 16
-; X64-NEXT:    pushq %r15
-; X64-NEXT:    .cfi_def_cfa_offset 24
-; X64-NEXT:    pushq %r14
-; X64-NEXT:    .cfi_def_cfa_offset 32
 ; X64-NEXT:    pushq %rbx
-; X64-NEXT:    .cfi_def_cfa_offset 40
-; X64-NEXT:    pushq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 24
+; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 48
-; X64-NEXT:    .cfi_offset %rbx, -40
-; X64-NEXT:    .cfi_offset %r14, -32
-; X64-NEXT:    .cfi_offset %r15, -24
+; X64-NEXT:    .cfi_offset %rbx, -24
 ; X64-NEXT:    .cfi_offset %rbp, -16
 ; X64-NEXT:    movq %rsp, %rax
 ; X64-NEXT:    movq $-1, %rbx
@@ -525,10 +516,10 @@ define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_harden
 ; X64-NEXT:    cmpl $41, %edi
 ; X64-NEXT:    jg .LBB4_1
 ; X64-NEXT:  # %bb.2: # %thrower
-; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    cmovgq %rbx, %rax
 ; X64-NEXT:    movslq %edi, %rcx
-; X64-NEXT:    movq %rsi, %r15
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movl (%rsi,%rcx,4), %ebp
 ; X64-NEXT:    orl %eax, %ebp
 ; X64-NEXT:    movl $4, %edi
@@ -542,7 +533,7 @@ define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_harden
 ; X64-NEXT:    cmpq $.Lslh_ret_addr4, %rdx
 ; X64-NEXT:    cmovneq %rbx, %rcx
 ; X64-NEXT:    movl %ebp, (%rax)
-; X64-NEXT:  .Ltmp0:
+; X64-NEXT:  .Ltmp0: # EH_LABEL
 ; X64-NEXT:    shlq $47, %rcx
 ; X64-NEXT:    movq %rax, %rdi
 ; X64-NEXT:    xorl %esi, %esi
@@ -555,34 +546,32 @@ define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_harden
 ; X64-NEXT:    sarq $63, %rax
 ; X64-NEXT:    cmpq $.Lslh_ret_addr5, %rcx
 ; X64-NEXT:    cmovneq %rbx, %rax
-; X64-NEXT:  .Ltmp1:
+; X64-NEXT:  .Ltmp1: # EH_LABEL
 ; X64-NEXT:    jmp .LBB4_3
 ; X64-NEXT:  .LBB4_1:
 ; X64-NEXT:    cmovleq %rbx, %rax
 ; X64-NEXT:  .LBB4_3: # %exit
 ; X64-NEXT:    shlq $47, %rax
 ; X64-NEXT:    orq %rax, %rsp
-; X64-NEXT:    addq $8, %rsp
-; X64-NEXT:    .cfi_def_cfa_offset 40
-; X64-NEXT:    popq %rbx
-; X64-NEXT:    .cfi_def_cfa_offset 32
-; X64-NEXT:    popq %r14
+; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 24
-; X64-NEXT:    popq %r15
+; X64-NEXT:    popq %rbx
 ; X64-NEXT:    .cfi_def_cfa_offset 16
 ; X64-NEXT:    popq %rbp
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB4_4: # %lpad
 ; X64-NEXT:    .cfi_def_cfa_offset 48
-; X64-NEXT:  .Ltmp2:
+; X64-NEXT:  .Ltmp2: # EH_LABEL
 ; X64-NEXT:    movq %rsp, %rcx
 ; X64-NEXT:    sarq $63, %rcx
-; X64-NEXT:    movl (%rax), %eax
-; X64-NEXT:    addl (%r15), %eax
-; X64-NEXT:    cltq
-; X64-NEXT:    orq %rcx, %rax
-; X64-NEXT:    movl (%r14,%rax,4), %edi
+; X64-NEXT:    movl (%rax), %edx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    addl (%rax), %edx
+; X64-NEXT:    movslq %edx, %rdx
+; X64-NEXT:    orq %rcx, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movl (%rax,%rdx,4), %edi
 ; X64-NEXT:    orl %ecx, %edi
 ; X64-NEXT:    shlq $47, %rcx
 ; X64-NEXT:    orq %rcx, %rsp
@@ -616,12 +605,12 @@ define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_harden
 ; X64-LFENCE-NEXT:    movl $4, %edi
 ; X64-LFENCE-NEXT:    callq __cxa_allocate_exception at PLT
 ; X64-LFENCE-NEXT:    movl %ebp, (%rax)
-; X64-LFENCE-NEXT:  .Ltmp0:
+; X64-LFENCE-NEXT:  .Ltmp0: # EH_LABEL
 ; X64-LFENCE-NEXT:    movq %rax, %rdi
 ; X64-LFENCE-NEXT:    xorl %esi, %esi
 ; X64-LFENCE-NEXT:    xorl %edx, %edx
 ; X64-LFENCE-NEXT:    callq __cxa_throw at PLT
-; X64-LFENCE-NEXT:  .Ltmp1:
+; X64-LFENCE-NEXT:  .Ltmp1: # EH_LABEL
 ; X64-LFENCE-NEXT:  .LBB4_2: # %exit
 ; X64-LFENCE-NEXT:    lfence
 ; X64-LFENCE-NEXT:    popq %rbx
@@ -633,7 +622,7 @@ define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_harden
 ; X64-LFENCE-NEXT:    retq
 ; X64-LFENCE-NEXT:  .LBB4_3: # %lpad
 ; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 32
-; X64-LFENCE-NEXT:  .Ltmp2:
+; X64-LFENCE-NEXT:  .Ltmp2: # EH_LABEL
 ; X64-LFENCE-NEXT:    movl (%rax), %eax
 ; X64-LFENCE-NEXT:    addl (%r14), %eax
 ; X64-LFENCE-NEXT:    cltq
diff --git a/llvm/test/CodeGen/X86/split-reg-with-hint.ll b/llvm/test/CodeGen/X86/split-reg-with-hint.ll
index 689f83ff0adc4..85d5dbc123873 100644
--- a/llvm/test/CodeGen/X86/split-reg-with-hint.ll
+++ b/llvm/test/CodeGen/X86/split-reg-with-hint.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -regalloc-csr-cost-scale=0 | FileCheck %s
 
 ; %ptr has a hint to %rdi in entry block, it also has a interference with %rdi
 ; in block if.then. It should be split in cold block if.then.
diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll
index 2a0710e3249a6..a7114b3d10434 100644
--- a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll
+++ b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll
@@ -68,7 +68,7 @@ define void @test_mixed(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(
 ; CHECK-PREG:    $rdx = COPY killed renamable $r14
 ; CHECK-PREG:    dead $ecx = MOV32r0 implicit-def dead $eflags, implicit-def $rcx
 ; CHECK-PREG:    $r8 = COPY killed renamable $rbx
-; CHECK-PREG:    CALL64pcrel32 @consume5, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit killed $rcx, implicit killed $r8, implicit-def $rsp, implicit-def $ssp
+; CHECK-PREG:    CALL64pcrel32 @consume5, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit $rcx, implicit $r8, implicit-def $rsp, implicit-def $ssp
 
 entry:
   %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @func, i32 0, i32 0, i32 0, i32 0) ["gc-live" (ptr addrspace(1) %a, ptr addrspace(1) null, ptr addrspace(1) %b, ptr addrspace(1) null, ptr addrspace(1) %c)]
@@ -168,7 +168,7 @@ define void @test_gcrelocate_uniqueing(ptr addrspace(1) %ptr) gc "statepoint-exa
 ; CHECK-PREG:    renamable $rbx = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 2, killed renamable $rbx, 2, 4278124286, 2, 1, renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp
 ; CHECK-PREG:    $rdi = COPY renamable $rbx
 ; CHECK-PREG:    $rsi = COPY killed renamable $rbx
-; CHECK-PREG:    CALL64pcrel32 @consume2, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit killed $rsi, implicit-def $rsp, implicit-def $ssp
+; CHECK-PREG:    CALL64pcrel32 @consume2, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit-def $rsp, implicit-def $ssp
 
   %tok = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @func, i32 0, i32 0, i32 0, i32 0) ["deopt" (ptr addrspace(1) %ptr, i32 undef), "gc-live" (ptr addrspace(1) %ptr, ptr addrspace(1) %ptr)]
   %a = call ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %tok, i32 0, i32 0)
@@ -194,7 +194,7 @@ define void @test_gcptr_uniqueing(ptr addrspace(1) %ptr) gc "statepoint-example"
 ; CHECK-PREG:    renamable $rbx = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 2, killed renamable $rbx, 2, 4278124286, 2, 1, renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp
 ; CHECK-PREG:    $rdi = COPY renamable $rbx
 ; CHECK-PREG:    $rsi = COPY killed renamable $rbx
-; CHECK-PREG:    CALL64pcrel32 @use1, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit killed $rsi, implicit-def $rsp, implicit-def $ssp
+; CHECK-PREG:    CALL64pcrel32 @use1, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit-def $rsp, implicit-def $ssp
 
   %tok = tail call token (i64, i32, ptr, i32, i32, ...)
       @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @func, i32 0, i32 0, i32 0, i32 0) ["deopt" (ptr addrspace(1) %ptr, i32 undef), "gc-live" (ptr addrspace(1) %ptr, ptr addrspace(1) %ptr)]
diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll b/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll
index f08b388c3dc56..b83e58350f66d 100644
--- a/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll
+++ b/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll
@@ -199,7 +199,7 @@ define void @test_duplicate_ir_values() gc "statepoint-example" personality ptr
   ; CHECK-NEXT:   renamable $rbx = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0)
   ; CHECK-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
   ; CHECK-NEXT:   $edi = MOV32ri 10
-  ; CHECK-NEXT:   dead renamable $rbx = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @__llvm_deoptimize, killed $edi, 2, 0, 2, 2, 2, 2, killed renamable $rbx, renamable $rbx, 2, 1, renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp
+  ; CHECK-NEXT:   dead renamable $rbx = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @__llvm_deoptimize, $edi, 2, 0, 2, 2, 2, 2, killed renamable $rbx, renamable $rbx, 2, 1, renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp
   ; CHECK-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.exceptional_return (landing-pad):
@@ -209,7 +209,7 @@ define void @test_duplicate_ir_values() gc "statepoint-example" personality ptr
   ; CHECK-NEXT:   renamable $rbx = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0)
   ; CHECK-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
   ; CHECK-NEXT:   $edi = MOV32ri -271
-  ; CHECK-NEXT:   dead renamable $rbx = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @__llvm_deoptimize, killed $edi, 2, 0, 2, 0, 2, 1, killed renamable $rbx, 2, 1, renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp
+  ; CHECK-NEXT:   dead renamable $rbx = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @__llvm_deoptimize, $edi, 2, 0, 2, 0, 2, 1, killed renamable $rbx, 2, 1, renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp
   ; CHECK-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
 entry:
   %val1 = load ptr addrspace(1), ptr addrspace(1) undef, align 8
diff --git a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
index f3e6d8b5489c3..ad655bf366909 100644
--- a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
+++ b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -regalloc-csr-cost-scale=0 | FileCheck %s
 
 ; Function Attrs: nounwind uwtable
 define void @tail_dup_merge_loops(i32 %a, ptr %b, ptr %c) local_unnamed_addr #0 {
diff --git a/llvm/test/CodeGen/X86/tail-opts.ll b/llvm/test/CodeGen/X86/tail-opts.ll
index d9ab2f7d1f5fb..6ee16149e0939 100644
--- a/llvm/test/CodeGen/X86/tail-opts.ll
+++ b/llvm/test/CodeGen/X86/tail-opts.ll
@@ -239,70 +239,73 @@ bb30:
 define fastcc void @c_expand_expr_stmt(ptr %expr) nounwind {
 ; CHECK-LABEL: c_expand_expr_stmt:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    jne .LBB3_9
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    movzbl 0, %ebx
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    movzbl 0, %eax
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    testb %cl, %cl
 ; CHECK-NEXT:    jne .LBB3_8
 ; CHECK-NEXT:  # %bb.2: # %bb.i
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    testb %cl, %cl
 ; CHECK-NEXT:    je .LBB3_8
 ; CHECK-NEXT:  # %bb.3: # %lvalue_p.exit
-; CHECK-NEXT:    movq 0, %rax
-; CHECK-NEXT:    movzbl (%rax), %ecx
-; CHECK-NEXT:    testl %ecx, %ecx
+; CHECK-NEXT:    movq 0, %rcx
+; CHECK-NEXT:    movzbl (%rcx), %edx
+; CHECK-NEXT:    testl %edx, %edx
 ; CHECK-NEXT:    je .LBB3_10
 ; CHECK-NEXT:  # %bb.4: # %lvalue_p.exit
-; CHECK-NEXT:    cmpl $2, %ecx
+; CHECK-NEXT:    cmpl $2, %edx
 ; CHECK-NEXT:    jne .LBB3_15
 ; CHECK-NEXT:  # %bb.5: # %bb.i1
-; CHECK-NEXT:    movq 32(%rax), %rax
-; CHECK-NEXT:    movzbl 16(%rax), %ecx
-; CHECK-NEXT:    testl %ecx, %ecx
+; CHECK-NEXT:    movq 32(%rcx), %rcx
+; CHECK-NEXT:    movzbl 16(%rcx), %edx
+; CHECK-NEXT:    testl %edx, %edx
 ; CHECK-NEXT:    je .LBB3_13
 ; CHECK-NEXT:  # %bb.6: # %bb.i1
-; CHECK-NEXT:    cmpl $2, %ecx
+; CHECK-NEXT:    cmpl $2, %edx
 ; CHECK-NEXT:    jne .LBB3_15
 ; CHECK-NEXT:  # %bb.7: # %bb.i.i
+; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
 ; CHECK-NEXT:    callq lvalue_p at PLT
 ; CHECK-NEXT:    testl %eax, %eax
-; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-NEXT:    setne %cl
+; CHECK-NEXT:    addq $8, %rsp
 ; CHECK-NEXT:    jmp .LBB3_16
 ; CHECK-NEXT:  .LBB3_8: # %bb1
-; CHECK-NEXT:    cmpb $23, %bl
+; CHECK-NEXT:    cmpb $23, %al
 ; CHECK-NEXT:  .LBB3_9: # %bb3
 ; CHECK-NEXT:  .LBB3_15:
-; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    jmp .LBB3_16
 ; CHECK-NEXT:  .LBB3_10: # %bb2.i3
-; CHECK-NEXT:    movq 8(%rax), %rax
-; CHECK-NEXT:    movzbl 16(%rax), %ecx
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    cmpl $23, %ecx
+; CHECK-NEXT:    movq 8(%rcx), %rcx
+; CHECK-NEXT:    movzbl 16(%rcx), %edx
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    cmpl $23, %edx
 ; CHECK-NEXT:    je .LBB3_16
 ; CHECK-NEXT:  # %bb.11: # %bb2.i3
-; CHECK-NEXT:    cmpl $16, %ecx
+; CHECK-NEXT:    cmpl $16, %edx
 ; CHECK-NEXT:    je .LBB3_16
 ; CHECK-NEXT:    jmp .LBB3_9
 ; CHECK-NEXT:  .LBB3_13: # %bb2.i.i2
-; CHECK-NEXT:    movq 8(%rax), %rax
-; CHECK-NEXT:    movzbl 16(%rax), %ecx
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    cmpl $16, %ecx
+; CHECK-NEXT:    movq 8(%rcx), %rcx
+; CHECK-NEXT:    movzbl 16(%rcx), %edx
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    cmpl $16, %edx
 ; CHECK-NEXT:    je .LBB3_16
 ; CHECK-NEXT:  # %bb.14: # %bb2.i.i2
-; CHECK-NEXT:    cmpl $23, %ecx
+; CHECK-NEXT:    cmpl $23, %edx
 ; CHECK-NEXT:    jne .LBB3_9
 ; CHECK-NEXT:  .LBB3_16: # %lvalue_p.exit4
-; CHECK-NEXT:    testb %bl, %bl
-; CHECK-NEXT:    sete %cl
-; CHECK-NEXT:    orb %al, %cl
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    orb %cl, %al
 entry:
   %tmp4 = load i8, ptr null, align 8                  ; <i8> [#uses=3]
   switch i8 %tmp4, label %bb3 [
diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
index 23efdba9bcdeb..bd760f62817a0 100644
--- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
@@ -1363,9 +1363,6 @@ define i32 @irreducibleCFG() #4 {
 ; ENABLE-NEXT:    .cfi_offset %rbp, -16
 ; ENABLE-NEXT:    movq %rsp, %rbp
 ; ENABLE-NEXT:    .cfi_def_cfa_register %rbp
-; ENABLE-NEXT:    pushq %rbx
-; ENABLE-NEXT:    pushq %rax
-; ENABLE-NEXT:    .cfi_offset %rbx, -24
 ; ENABLE-NEXT:    movq _irreducibleCFGf at GOTPCREL(%rip), %rax
 ; ENABLE-NEXT:    cmpb $0, (%rax)
 ; ENABLE-NEXT:    je LBB16_2
@@ -1376,26 +1373,22 @@ define i32 @irreducibleCFG() #4 {
 ; ENABLE-NEXT:  LBB16_2: ## %split
 ; ENABLE-NEXT:    movq _irreducibleCFGb at GOTPCREL(%rip), %rax
 ; ENABLE-NEXT:    cmpl $0, (%rax)
-; ENABLE-NEXT:    je LBB16_3
-; ENABLE-NEXT:  ## %bb.4: ## %for.body4.i
+; ENABLE-NEXT:    je LBB16_4
+; ENABLE-NEXT:  ## %bb.3: ## %for.body4.i
 ; ENABLE-NEXT:    movq _irreducibleCFGa at GOTPCREL(%rip), %rax
 ; ENABLE-NEXT:    movl (%rax), %edi
-; ENABLE-NEXT:    xorl %ebx, %ebx
 ; ENABLE-NEXT:    xorl %eax, %eax
 ; ENABLE-NEXT:    callq _something
-; ENABLE-NEXT:    jmp LBB16_5
-; ENABLE-NEXT:  LBB16_3:
-; ENABLE-NEXT:    xorl %ebx, %ebx
+; ENABLE-NEXT:  LBB16_4: ## %for.inc
+; ENABLE-NEXT:    xorl %eax, %eax
 ; ENABLE-NEXT:    .p2align 4
 ; ENABLE-NEXT:  LBB16_5: ## %for.inc
 ; ENABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
-; ENABLE-NEXT:    incl %ebx
-; ENABLE-NEXT:    cmpl $7, %ebx
+; ENABLE-NEXT:    incl %eax
+; ENABLE-NEXT:    cmpl $7, %eax
 ; ENABLE-NEXT:    jl LBB16_5
 ; ENABLE-NEXT:  ## %bb.6: ## %fn1.exit
 ; ENABLE-NEXT:    xorl %eax, %eax
-; ENABLE-NEXT:    addq $8, %rsp
-; ENABLE-NEXT:    popq %rbx
 ; ENABLE-NEXT:    popq %rbp
 ; ENABLE-NEXT:    retq
 ;
@@ -1406,9 +1399,6 @@ define i32 @irreducibleCFG() #4 {
 ; DISABLE-NEXT:    .cfi_offset %rbp, -16
 ; DISABLE-NEXT:    movq %rsp, %rbp
 ; DISABLE-NEXT:    .cfi_def_cfa_register %rbp
-; DISABLE-NEXT:    pushq %rbx
-; DISABLE-NEXT:    pushq %rax
-; DISABLE-NEXT:    .cfi_offset %rbx, -24
 ; DISABLE-NEXT:    movq _irreducibleCFGf at GOTPCREL(%rip), %rax
 ; DISABLE-NEXT:    cmpb $0, (%rax)
 ; DISABLE-NEXT:    je LBB16_2
@@ -1419,26 +1409,22 @@ define i32 @irreducibleCFG() #4 {
 ; DISABLE-NEXT:  LBB16_2: ## %split
 ; DISABLE-NEXT:    movq _irreducibleCFGb at GOTPCREL(%rip), %rax
 ; DISABLE-NEXT:    cmpl $0, (%rax)
-; DISABLE-NEXT:    je LBB16_3
-; DISABLE-NEXT:  ## %bb.4: ## %for.body4.i
+; DISABLE-NEXT:    je LBB16_4
+; DISABLE-NEXT:  ## %bb.3: ## %for.body4.i
 ; DISABLE-NEXT:    movq _irreducibleCFGa at GOTPCREL(%rip), %rax
 ; DISABLE-NEXT:    movl (%rax), %edi
-; DISABLE-NEXT:    xorl %ebx, %ebx
 ; DISABLE-NEXT:    xorl %eax, %eax
 ; DISABLE-NEXT:    callq _something
-; DISABLE-NEXT:    jmp LBB16_5
-; DISABLE-NEXT:  LBB16_3:
-; DISABLE-NEXT:    xorl %ebx, %ebx
+; DISABLE-NEXT:  LBB16_4: ## %for.inc
+; DISABLE-NEXT:    xorl %eax, %eax
 ; DISABLE-NEXT:    .p2align 4
 ; DISABLE-NEXT:  LBB16_5: ## %for.inc
 ; DISABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
-; DISABLE-NEXT:    incl %ebx
-; DISABLE-NEXT:    cmpl $7, %ebx
+; DISABLE-NEXT:    incl %eax
+; DISABLE-NEXT:    cmpl $7, %eax
 ; DISABLE-NEXT:    jl LBB16_5
 ; DISABLE-NEXT:  ## %bb.6: ## %fn1.exit
 ; DISABLE-NEXT:    xorl %eax, %eax
-; DISABLE-NEXT:    addq $8, %rsp
-; DISABLE-NEXT:    popq %rbx
 ; DISABLE-NEXT:    popq %rbp
 ; DISABLE-NEXT:    retq
 entry:

>From b8bea2d53a212966c5af69f046f59bfcf4f421fe Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Wed, 8 Apr 2026 15:42:41 -0700
Subject: [PATCH 2/4] If PPX is implemented, push/pop doesn't access memory, so
 skip this optimization.

---
 llvm/include/llvm/CodeGen/TargetRegisterInfo.h |  4 +++-
 llvm/lib/CodeGen/RegAllocGreedy.cpp            |  2 +-
 llvm/lib/Target/AArch64/AArch64RegisterInfo.h  |  2 +-
 llvm/lib/Target/X86/X86RegisterInfo.cpp        | 10 ++++++++++
 llvm/lib/Target/X86/X86RegisterInfo.h          |  5 +----
 5 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index ecf027d97cc15..0b218743eb74e 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -1041,7 +1041,9 @@ class LLVM_ABI TargetRegisterInfo : public MCRegisterInfo {
   /// Allow the target to override the cost of using a callee-saved register for
   /// the first time. Default value of 0 means we will use a callee-saved
   /// register if it is available.
-  virtual unsigned getCSRFirstUseCost() const { return 0; }
+  virtual unsigned getCSRFirstUseCost(const MachineFunction &MF) const {
+    return 0;
+  }
   /// FIXME: We should deprecate this usage.
   virtual unsigned getCSRCost() const { return 0; }
 
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index f2f1897795890..fbe5f3a332c44 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -2449,7 +2449,7 @@ void RAGreedy::initializeCSRCost() {
     }
   } else {
     uint64_t EntryFreq = MBFI->getEntryFreq().getFrequency();
-    CSRCost = BlockFrequency(TRI->getCSRFirstUseCost() * EntryFreq);
+    CSRCost = BlockFrequency(TRI->getCSRFirstUseCost(*MF) * EntryFreq);
     if (CSRCostScale < 100)
       CSRCost *= BranchProbability(CSRCostScale, 100);
     else
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index ac58d8d6b1cc7..c9d630be93bed 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -59,7 +59,7 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo {
     // cold path instead of using a callee-saved register.
     return 5;
   }
-  unsigned getCSRFirstUseCost() const override {
+  unsigned getCSRFirstUseCost(const MachineFunction &MF) const override {
     // The cost of 2 means push and pop for each CSR.
     return 2;
   }
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index c84e0f441a459..fb940aaa8ab71 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -1281,3 +1281,13 @@ bool X86RegisterInfo::isNonRex2RegClass(const TargetRegisterClass *RC) const {
     return true;
   }
 }
+
+unsigned X86RegisterInfo::getCSRFirstUseCost(const MachineFunction &MF) const {
+  // If PPX is implemented, push/pop pairs doesn't access memory.
+  const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
+  if (ST.is64Bit() && ST.hasPPX())
+    return 0;
+
+  // push + pop.
+  return 2;
+}
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h
index 134089ed91f8d..1418e2892768a 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -183,10 +183,7 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
     return true;
   }
 
-  unsigned getCSRFirstUseCost() const override {
-    // push + pop.
-    return 2;
-  }
+  unsigned getCSRFirstUseCost(const MachineFunction &MF) const override;
 };
 
 } // End llvm namespace

>From 4951a609f9d30dfa5ba91b1ca1c59a38c41588c2 Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Fri, 24 Apr 2026 15:02:06 -0700
Subject: [PATCH 3/4] Fix the new test case.

---
 llvm/test/CodeGen/X86/apx/pr191368.ll | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/llvm/test/CodeGen/X86/apx/pr191368.ll b/llvm/test/CodeGen/X86/apx/pr191368.ll
index 20f3dae254598..62fc0dc18a447 100644
--- a/llvm/test/CodeGen/X86/apx/pr191368.ll
+++ b/llvm/test/CodeGen/X86/apx/pr191368.ll
@@ -276,13 +276,12 @@ define i32 @pr190962(ptr %a, ptr %b, ptr %c, i64 %d, i64 %e, i64 %f) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pushq %r15
 ; CHECK-NEXT:    pushq %r14
-; CHECK-NEXT:    pushq %r13
 ; CHECK-NEXT:    pushq %r12
 ; CHECK-NEXT:    pushq %rsi
 ; CHECK-NEXT:    pushq %rdi
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    subq $56, %rsp
+; CHECK-NEXT:    subq $64, %rsp
 ; CHECK-NEXT:    movq %r9, %rdi
 ; CHECK-NEXT:    movq %rdx, %rbx
 ; CHECK-NEXT:    movq %rcx, %rsi
@@ -292,19 +291,20 @@ define i32 @pr190962(ptr %a, ptr %b, ptr %c, i64 %d, i64 %e, i64 %f) nounwind {
 ; CHECK-NEXT:    testq %rax, %rax
 ; CHECK-NEXT:    je .LBB3_3
 ; CHECK-NEXT:  # %bb.1: # %l1
-; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %r13
-; CHECK-NEXT:    movq 0, %r15
-; CHECK-NEXT:    callq f2
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %r15
 ; CHECK-NEXT:    movq 0, %r12
+; CHECK-NEXT:    callq f2
+; CHECK-NEXT:    movq 0, %rcx
 ; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    orq %r13, %r15
+; CHECK-NEXT:    orq %r12, %r15
 ; CHECK-NEXT:    jne .LBB3_2
 ; CHECK-NEXT:  # %bb.4: # %l2
-; CHECK-NEXT:    movq %rax, %r14
+; CHECK-NEXT:    movq %rcx, %r15
 ; CHECK-NEXT:    movq %rbx, %rcx
+; CHECK-NEXT:    movq %rax, %rbx
 ; CHECK-NEXT:    callq f4
 ; CHECK-NEXT:    movl $1, %ebp
-; CHECK-NEXT:    orq %r12, %r14
+; CHECK-NEXT:    orq %r15, %rbx
 ; CHECK-NEXT:    je .LBB3_5
 ; CHECK-NEXT:  .LBB3_2: # %common.ret1.sink.split
 ; CHECK-NEXT:    callq f3
@@ -316,23 +316,21 @@ define i32 @pr190962(ptr %a, ptr %b, ptr %c, i64 %d, i64 %e, i64 %f) nounwind {
 ; CHECK-NEXT:    callq f3
 ; CHECK-NEXT:  .LBB3_3: # %common.ret1
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    addq $56, %rsp
+; CHECK-NEXT:    addq $64, %rsp
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    popq %rdi
 ; CHECK-NEXT:    popq %rsi
 ; CHECK-NEXT:    popq %r12
-; CHECK-NEXT:    popq %r13
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %r15
 ; CHECK-NEXT:    retq
 ; CHECK-NEXT:  .LBB3_5: # %l3
 ; CHECK-NEXT:    testq %rdi, %rdi
-; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rax
 ; CHECK-NEXT:    je .LBB3_3
 ; CHECK-NEXT:  # %bb.6: # %l4
 ; CHECK-NEXT:    movl $0, (%rsi)
-; CHECK-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movq %r14, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movq $0, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movq $0, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    xorl %ecx, %ecx

>From 90799b047f11cdd2f41a63ecdc5f0e3a685ed454 Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Thu, 14 May 2026 20:40:54 +0000
Subject: [PATCH 4/4] Fix comment.

---
 llvm/lib/Target/X86/X86RegisterInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index fb940aaa8ab71..3bc85ef072fb7 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -1283,7 +1283,7 @@ bool X86RegisterInfo::isNonRex2RegClass(const TargetRegisterClass *RC) const {
 }
 
 unsigned X86RegisterInfo::getCSRFirstUseCost(const MachineFunction &MF) const {
-  // If PPX is implemented, push/pop pairs doesn't access memory.
+  // If PPX is implemented, push/pop pairs don't access memory.
   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
   if (ST.is64Bit() && ST.hasPPX())
     return 0;



More information about the cfe-commits mailing list