[clang] [llvm] [RegAlloc] [X86] Enable callee saved register optimization for x86 (PR #188609)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 17 09:30:41 PDT 2026
https://github.com/weiguozhi updated https://github.com/llvm/llvm-project/pull/188609
>From 16c7e2709fa2217137e9c0bd331bfaa92ab6c8ac Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Fri, 6 Mar 2026 11:00:11 -0800
Subject: [PATCH 1/4] [RegAlloc] [X86] Enable callee saved register
optimization for x86
Enable callee saved register optimization implemented in
RAGreedy::tryAssignCSRFirstTime() for x86. It can replace save/restore
instructions in prologue/epilogue with register spill/reload in cold
blocks or register splits.
---
clang/test/Frontend/stack-layout-remark.c | 20 +--
llvm/lib/CodeGen/RegAllocGreedy.cpp | 2 +-
llvm/lib/Target/X86/X86RegisterInfo.h | 5 +
.../CodeGen/MLRegAlloc/interactive-mode.ll | 2 +-
.../X86/2007-08-09-IllegalX86-64Asm.ll | 57 ++++----
.../CodeGen/X86/2007-11-30-LoadFolding-Bug.ll | 36 ++---
llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll | 2 +-
llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll | 66 ++++-----
llvm/test/CodeGen/X86/atom-fixup-lea2.ll | 2 +-
llvm/test/CodeGen/X86/block-placement.ll | 4 +-
.../CodeGen/X86/callbr-asm-branch-folding.ll | 5 +-
llvm/test/CodeGen/X86/fptosi-sat-scalar.ll | 120 ++++++++--------
llvm/test/CodeGen/X86/fptoui-sat-scalar.ll | 136 +++++++++---------
.../X86/indirect-branch-tracking-eh.ll | 1 -
llvm/test/CodeGen/X86/no-split-size.ll | 2 +-
llvm/test/CodeGen/X86/ragreedy-bug.ll | 2 +-
llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 60 ++++----
llvm/test/CodeGen/X86/sjlj-eh.ll | 2 +-
.../CodeGen/X86/speculative-load-hardening.ll | 133 ++++++++---------
llvm/test/CodeGen/X86/split-reg-with-hint.ll | 2 +-
.../CodeGen/X86/statepoint-vreg-details.ll | 6 +-
.../CodeGen/X86/statepoint-vreg-invoke.ll | 4 +-
.../X86/tail-dup-merge-loop-headers.ll | 2 +-
llvm/test/CodeGen/X86/tail-opts.ll | 63 ++++----
llvm/test/CodeGen/X86/x86-shrink-wrapping.ll | 38 ++---
25 files changed, 382 insertions(+), 390 deletions(-)
diff --git a/clang/test/Frontend/stack-layout-remark.c b/clang/test/Frontend/stack-layout-remark.c
index b0ed03c80f24a..461312bd047c1 100644
--- a/clang/test/Frontend/stack-layout-remark.c
+++ b/clang/test/Frontend/stack-layout-remark.c
@@ -135,26 +135,28 @@ extern void use_dot_vector(struct Array *data);
// O0-DEBUG: Function: do_work
// O0-DEBUG-NEXT: Offset: [SP-4], Type: Variable, Align: 4, Size: 4
// O0-DEBUG-NEXT: Offset: [SP-16], Type: Variable, Align: 8, Size: 8
-// O0-DEBUG-NEXT: A @ {{.*}}stack-layout-remark.c:[[# @LINE + 20]]
+// O0-DEBUG-NEXT: A @ {{.*}}stack-layout-remark.c:[[# @LINE + 22]]
// O0-DEBUG-NEXT: Offset: [SP-24], Type: Variable, Align: 8, Size: 8
-// O0-DEBUG-NEXT: B @ {{.*}}stack-layout-remark.c:[[# @LINE + 18]]
+// O0-DEBUG-NEXT: B @ {{.*}}stack-layout-remark.c:[[# @LINE + 20]]
// O0-DEBUG-NEXT: Offset: [SP-32], Type: Variable, Align: 8, Size: 8
-// O0-DEBUG-NEXT: out @ {{.*}}stack-layout-remark.c:[[# @LINE + 16]]
+// O0-DEBUG-NEXT: out @ {{.*}}stack-layout-remark.c:[[# @LINE + 18]]
// O0-DEBUG-NEXT: Offset: [SP-36], Type: Variable, Align: 4, Size: 4
-// O0-DEBUG-NEXT: len @ {{.*}}stack-layout-remark.c:[[# @LINE + 19]]
+// O0-DEBUG-NEXT: len @ {{.*}}stack-layout-remark.c:[[# @LINE + 21]]
// O0-DEBUG-NEXT: Offset: [SP-48], Type: Variable, Align: 8, Size: 8
-// O0-DEBUG-NEXT: AB @ {{.*}}stack-layout-remark.c:[[# @LINE + 18]]
+// O0-DEBUG-NEXT: AB @ {{.*}}stack-layout-remark.c:[[# @LINE + 20]]
// O0-DEBUG-NEXT: Offset: [SP-52], Type: Variable, Align: 4, Size: 4
-// O0-DEBUG-NEXT: sum @ {{.*}}stack-layout-remark.c:[[# @LINE + 32]]
+// O0-DEBUG-NEXT: sum @ {{.*}}stack-layout-remark.c:[[# @LINE + 34]]
// O0-DEBUG-NEXT: Offset: [SP-56], Type: Variable, Align: 4, Size: 4
-// O0-DEBUG-NEXT: i @ {{.*}}stack-layout-remark.c:[[# @LINE + 31]]
+// O0-DEBUG-NEXT: i @ {{.*}}stack-layout-remark.c:[[# @LINE + 33]]
// O3-DEBUG: Function: do_work
// O3-DEBUG-NEXT: Offset: [SP-8], Type: Spill, Align: 16, Size: 8
// O3-DEBUG-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
-// O3-DEBUG-NEXT: Offset: [SP-24], Type: Spill, Align: 16, Size: 8
+// O3-DEBUG-NEXT: Offset: [SP-24], Type: Spill, Align: 8, Size: 8
+// O3-DEBUG-NEXT: B @ {{.*}}stack-layout-remark.c:[[# @LINE + 4]]
// O3-DEBUG-NEXT: Offset: [SP-32], Type: Spill, Align: 8, Size: 8
-// O3-DEBUG-NEXT: Offset: [SP-40], Type: Spill, Align: 16, Size: 8
+// O3-DEBUG-NEXT: A @ {{.*}}stack-layout-remark.c:[[# @LINE + 2]]
+// O3-DEBUG-NEXT: Offset: [SP-40], Type: Spill, Align: 8, Size: 8
int do_work(struct Array *A, struct Array *B, struct Result *out) {
if (!A || !B)
return -1;
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 6b8a9b8190f9a..f2f1897795890 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -118,7 +118,7 @@ CSRFirstTimeCost("regalloc-csr-first-time-cost",
static cl::opt<unsigned> CSRCostScale(
"regalloc-csr-cost-scale",
cl::desc("Scale for the callee-saved register cost, in percentage."),
- cl::init(80), cl::Hidden);
+ cl::init(30), cl::Hidden);
static cl::opt<unsigned long> GrowRegionComplexityBudget(
"grow-region-complexity-budget",
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h
index e646591663aca..134089ed91f8d 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -182,6 +182,11 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
bool requiresRegisterScavenging(const MachineFunction &MF) const override {
return true;
}
+
+ unsigned getCSRFirstUseCost() const override {
+ // push + pop.
+ return 2;
+ }
};
} // End llvm namespace
diff --git a/llvm/test/CodeGen/MLRegAlloc/interactive-mode.ll b/llvm/test/CodeGen/MLRegAlloc/interactive-mode.ll
index 51af5891c4e0f..881920e1f6575 100644
--- a/llvm/test/CodeGen/MLRegAlloc/interactive-mode.ll
+++ b/llvm/test/CodeGen/MLRegAlloc/interactive-mode.ll
@@ -7,7 +7,7 @@
; RUN: cp %S/Inputs/interactive_main.py %t.rundir
; RUN: %python %t.rundir/interactive_main.py %t.channel-basename \
; RUN: llc -mtriple=x86_64-linux-unknown -regalloc=greedy -regalloc-enable-advisor=release -interactive-model-runner-echo-reply \
-; RUN: -regalloc-evict-interactive-channel-base=%t.channel-basename %S/Inputs/two-large-fcts.ll -o /dev/null | FileCheck %s
+; RUN: -regalloc-evict-interactive-channel-base=%t.channel-basename -regalloc-csr-cost-scale=0 %S/Inputs/two-large-fcts.ll -o /dev/null | FileCheck %s
;; Make sure we see both contexts. Also sanity-check that the advice is the
;; expected one - the index of the first legal register
diff --git a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
index 7bdc4e19a1cf6..e264e74fee9d3 100644
--- a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
+++ b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
@@ -24,20 +24,14 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: pushq %r15
-; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: pushq %r12
-; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: subq $32, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 80
-; CHECK-NEXT: .cfi_offset %rbx, -48
-; CHECK-NEXT: .cfi_offset %r12, -40
-; CHECK-NEXT: .cfi_offset %r14, -32
-; CHECK-NEXT: .cfi_offset %r15, -24
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset %rbx, -32
+; CHECK-NEXT: .cfi_offset %r14, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rsi, %rbx
; CHECK-NEXT: movq %rdi, %r14
@@ -80,7 +74,7 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: movzbl %sil, %ecx
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: divb %dl
-; CHECK-NEXT: movl %eax, %r15d
+; CHECK-NEXT: ## kill: def $al killed $al def $eax
; CHECK-NEXT: testb %cl, %cl
; CHECK-NEXT: jne LBB0_12
; CHECK-NEXT: jmp LBB0_14
@@ -103,32 +97,34 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: callq _feraiseexcept
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
-; CHECK-NEXT: xorl %r15d, %r15d
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %sil, %sil
; CHECK-NEXT: je LBB0_14
; CHECK-NEXT: LBB0_12: ## %cond_false.i
; CHECK-NEXT: testb %dl, %dl
; CHECK-NEXT: je LBB0_14
; CHECK-NEXT: ## %bb.13: ## %cond_next17.i
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; CHECK-NEXT: movzbl %sil, %eax
; CHECK-NEXT: divb %dl
-; CHECK-NEXT: movzbl %ah, %ebx
+; CHECK-NEXT: movzbl %ah, %ebp
; CHECK-NEXT: jmp LBB0_18
; CHECK-NEXT: LBB0_14: ## %cond_true.i200
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; CHECK-NEXT: testb %dl, %dl
; CHECK-NEXT: jne LBB0_17
; CHECK-NEXT: ## %bb.16: ## %cond_true14.i
; CHECK-NEXT: movl $4, %edi
; CHECK-NEXT: callq _feraiseexcept
; CHECK-NEXT: LBB0_17: ## %ubyte_ctype_remainder.exit
-; CHECK-NEXT: xorl %ebx, %ebx
+; CHECK-NEXT: xorl %ebp, %ebp
; CHECK-NEXT: LBB0_18: ## %ubyte_ctype_remainder.exit
; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: callq *224(%rax)
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: je LBB0_21
; CHECK-NEXT: ## %bb.19: ## %cond_true61
-; CHECK-NEXT: movl %eax, %ebp
+; CHECK-NEXT: movl %eax, %ebx
; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: movq _.str5 at GOTPCREL(%rip), %rdi
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
@@ -143,7 +139,7 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %edi
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
-; CHECK-NEXT: movl %ebp, %edx
+; CHECK-NEXT: movl %ebx, %edx
; CHECK-NEXT: callq *232(%rax)
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: jne LBB0_27
@@ -153,43 +149,42 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: je LBB0_27
; CHECK-NEXT: ## %bb.22: ## %cond_next97
-; CHECK-NEXT: movq %rax, %r14
-; CHECK-NEXT: movq _PyArray_API at GOTPCREL(%rip), %r12
-; CHECK-NEXT: movq (%r12), %rax
+; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: movq _PyArray_API at GOTPCREL(%rip), %r14
+; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: movq 200(%rax), %rdi
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: callq *304(%rdi)
; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: je LBB0_25
; CHECK-NEXT: ## %bb.23: ## %cond_next135
-; CHECK-NEXT: movb %r15b, 16(%rax)
-; CHECK-NEXT: movq %rax, 24(%r14)
-; CHECK-NEXT: movq (%r12), %rax
+; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx ## 4-byte Reload
+; CHECK-NEXT: movb %cl, 16(%rax)
+; CHECK-NEXT: movq %rax, 24(%rbx)
+; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: movq 200(%rax), %rdi
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: callq *304(%rdi)
; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: je LBB0_25
; CHECK-NEXT: ## %bb.24: ## %cond_next182
-; CHECK-NEXT: movb %bl, 16(%rax)
-; CHECK-NEXT: movq %rax, 32(%r14)
-; CHECK-NEXT: movq %r14, %rax
+; CHECK-NEXT: movb %bpl, 16(%rax)
+; CHECK-NEXT: movq %rax, 32(%rbx)
+; CHECK-NEXT: movq %rbx, %rax
; CHECK-NEXT: jmp LBB0_28
; CHECK-NEXT: LBB0_25: ## %cond_true113
-; CHECK-NEXT: decq (%r14)
+; CHECK-NEXT: decq (%rbx)
; CHECK-NEXT: jne LBB0_27
; CHECK-NEXT: ## %bb.26: ## %cond_true126
-; CHECK-NEXT: movq 8(%r14), %rax
-; CHECK-NEXT: movq %r14, %rdi
+; CHECK-NEXT: movq 8(%rbx), %rax
+; CHECK-NEXT: movq %rbx, %rdi
; CHECK-NEXT: callq *48(%rax)
; CHECK-NEXT: LBB0_27: ## %UnifiedReturnBlock
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: LBB0_28: ## %UnifiedReturnBlock
; CHECK-NEXT: addq $32, %rsp
; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: popq %r12
; CHECK-NEXT: popq %r14
-; CHECK-NEXT: popq %r15
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
index 6541693776099..65c291ab5a748 100644
--- a/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
+++ b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
@@ -14,13 +14,12 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: movl $1, %ebx
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT: movl $1, %esi
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: # %bb.i5
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: addl %ebx, %ebx
+; CHECK-NEXT: addl %esi, %esi
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb $1, %cl
; CHECK-NEXT: jne .LBB0_1
@@ -34,17 +33,17 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
; CHECK-NEXT: andl $1, %ebp
; CHECK-NEXT: xorpd %xmm0, %xmm0
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: xorl %edi, %edi
; CHECK-NEXT: xorpd %xmm1, %xmm1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_7: # %bb.i28.i
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: cvttsd2si %xmm1, %edi
-; CHECK-NEXT: cmpl %edx, %edi
-; CHECK-NEXT: cmovgel %eax, %edi
-; CHECK-NEXT: addl $2, %ecx
+; CHECK-NEXT: cvttsd2si %xmm1, %ecx
+; CHECK-NEXT: cmpl %edx, %ecx
+; CHECK-NEXT: cmovgel %eax, %ecx
+; CHECK-NEXT: addl $2, %edi
; CHECK-NEXT: xorps %xmm2, %xmm2
-; CHECK-NEXT: cvtsi2sd %edi, %xmm2
+; CHECK-NEXT: cvtsi2sd %ecx, %xmm2
; CHECK-NEXT: xorpd %xmm1, %xmm1
; CHECK-NEXT: subsd %xmm2, %xmm1
; CHECK-NEXT: mulsd %xmm0, %xmm1
@@ -54,9 +53,11 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
; CHECK-NEXT: movl $0, 0
; CHECK-NEXT: je .LBB0_9
; CHECK-NEXT: # %bb.10: # %mp_sqrt_init.exit
+; CHECK-NEXT: movl %esi, (%esp) # 4-byte Spill
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: movl %edx, %edi
-; CHECK-NEXT: movl %esi, %edx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; CHECK-NEXT: movl %ebx, %edx
; CHECK-NEXT: calll mp_mul_csqu at PLT
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: movl $-1, %edx
@@ -66,11 +67,11 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
; CHECK-NEXT: calll rdft at PLT
; CHECK-NEXT: addl $12, %esp
; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: movl %edi, (%esp) # 4-byte Spill
+; CHECK-NEXT: movl %edi, %esi
; CHECK-NEXT: movl %edi, %edx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
; CHECK-NEXT: pushl %edi
-; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl $0
; CHECK-NEXT: calll mp_mul_d2i at PLT
; CHECK-NEXT: addl $12, %esp
@@ -90,7 +91,6 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
; CHECK-NEXT: jmp .LBB0_9
; CHECK-NEXT: .LBB0_11: # %cond_false.i
; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: movl (%esp), %esi # 4-byte Reload
; CHECK-NEXT: movl %esi, %edx
; CHECK-NEXT: pushl {{[0-9]+}}(%esp)
; CHECK-NEXT: pushl $0
@@ -98,10 +98,10 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
; CHECK-NEXT: addl $8, %esp
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: movl %esi, %edx
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl %edi
-; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: calll mp_add at PLT
; CHECK-NEXT: addl $12, %esp
; CHECK-NEXT: xorl %ecx, %ecx
@@ -113,7 +113,7 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
; CHECK-NEXT: addl $12, %esp
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: movl %esi, %edx
-; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl $0
; CHECK-NEXT: calll mp_round at PLT
; CHECK-NEXT: addl $8, %esp
@@ -121,7 +121,7 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
; CHECK-NEXT: movl %esi, %edx
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-NEXT: pushl %ebx
+; CHECK-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; CHECK-NEXT: calll mp_mul_d2i at PLT
; CHECK-NEXT: addl $16, %esp
; CHECK-NEXT: jmp .LBB0_4
diff --git a/llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll b/llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll
index b32afdc2214e0..78b369599cd2f 100644
--- a/llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll
+++ b/llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i386-apple-darwin -disable-cgp-branch-opts | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-cgp-branch-opts -regalloc-csr-cost-scale=0 | FileCheck %s
%struct.DBC_t = type { i32, ptr, i16, ptr, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32, i32, i32, ptr, ptr, ptr, ptr, ptr, i32, i32, i32, i32, i32, i32, i32, i32, i16, i16, ptr, i8, i16, ptr, i16 }
%struct.DRVOPT = type { i16, i32, i8, ptr }
diff --git a/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll b/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll
index 254f824379d56..dde3e36fa1a2d 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll
@@ -32,18 +32,18 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
; CHECK-NEXT: movw %bx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movb %bpl, {{[0-9]+}}(%rsp)
; CHECK-NEXT: ldtilecfg (%rsp)
-; CHECK-NEXT: movl $32, %r14d
-; CHECK-NEXT: movl $buf+2048, %r15d
-; CHECK-NEXT: tileloadd (%r15,%r14), %tmm5
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: movl $32, %eax
+; CHECK-NEXT: movl $buf+2048, %r14d
+; CHECK-NEXT: tileloadd (%r14,%rax), %tmm5
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testb %cl, %cl
; CHECK-NEXT: jne .LBB0_2
; CHECK-NEXT: # %bb.1: # %if.true
-; CHECK-NEXT: movl $buf, %eax
-; CHECK-NEXT: movw $8, %cx
-; CHECK-NEXT: tileloadd (%rax,%r14), %tmm0
-; CHECK-NEXT: movl $buf+1024, %eax
-; CHECK-NEXT: tileloadd (%rax,%r14), %tmm1
+; CHECK-NEXT: movl $buf, %ecx
+; CHECK-NEXT: movw $8, %dx
+; CHECK-NEXT: tileloadd (%rcx,%rax), %tmm0
+; CHECK-NEXT: movl $buf+1024, %ecx
+; CHECK-NEXT: tileloadd (%rcx,%rax), %tmm1
; CHECK-NEXT: movabsq $64, %rax
; CHECK-NEXT: tilestored %tmm5, 1088(%rsp,%rax) # 1024-byte Folded Spill
; CHECK-NEXT: tdpbssd %tmm1, %tmm0, %tmm5
@@ -56,22 +56,23 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
; CHECK-NEXT: tileloadd 64(%rsp,%rax), %tmm6 # 1024-byte Folded Reload
; CHECK-NEXT: jmp .LBB0_3
; CHECK-NEXT: .LBB0_2: # %if.false
-; CHECK-NEXT: movl $buf, %eax
-; CHECK-NEXT: movw $8, %cx
-; CHECK-NEXT: tileloadd (%rax,%r14), %tmm2
-; CHECK-NEXT: movl $buf+1024, %eax
-; CHECK-NEXT: tileloadd (%rax,%r14), %tmm3
+; CHECK-NEXT: movl $buf, %ecx
+; CHECK-NEXT: movw $8, %dx
+; CHECK-NEXT: tileloadd (%rcx,%rax), %tmm2
+; CHECK-NEXT: movl $buf+1024, %ecx
+; CHECK-NEXT: tileloadd (%rcx,%rax), %tmm3
; CHECK-NEXT: movabsq $64, %rax
; CHECK-NEXT: tilestored %tmm5, 1088(%rsp,%rax) # 1024-byte Folded Spill
; CHECK-NEXT: tdpbssd %tmm3, %tmm2, %tmm5
; CHECK-NEXT: tilestored %tmm5, 64(%rsp,%rax) # 1024-byte Folded Spill
+; CHECK-NEXT: movl $32, %r15d
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: callq foo
; CHECK-NEXT: ldtilecfg (%rsp)
; CHECK-NEXT: movabsq $64, %rax
; CHECK-NEXT: tileloadd 64(%rsp,%rax), %tmm6 # 1024-byte Folded Reload
-; CHECK-NEXT: tilestored %tmm6, (%r15,%r14)
+; CHECK-NEXT: tilestored %tmm6, (%r14,%r15)
; CHECK-NEXT: .LBB0_3: # %exit
; CHECK-NEXT: movl $buf, %eax
; CHECK-NEXT: movl $32, %ecx
@@ -118,22 +119,22 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
; EGPR-NEXT: movw %bx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x5c,0x24,0x12]
; EGPR-NEXT: movb %bpl, {{[0-9]+}}(%rsp) # encoding: [0x40,0x88,0x6c,0x24,0x30]
; EGPR-NEXT: ldtilecfg (%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x04,0x24]
-; EGPR-NEXT: movl $32, %r14d # encoding: [0x41,0xbe,0x20,0x00,0x00,0x00]
-; EGPR-NEXT: movl $buf+2048, %r15d # encoding: [0x41,0xbf,A,A,A,A]
+; EGPR-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
+; EGPR-NEXT: movl $buf+2048, %r14d # encoding: [0x41,0xbe,A,A,A,A]
; EGPR-NEXT: # fixup A - offset: 2, value: buf+2048, kind: FK_Data_4
-; EGPR-NEXT: tileloadd (%r15,%r14), %tmm5 # EVEX TO VEX Compression encoding: [0xc4,0x82,0x7b,0x4b,0x2c,0x37]
-; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; EGPR-NEXT: testb %al, %al # encoding: [0x84,0xc0]
+; EGPR-NEXT: tileloadd (%r14,%rax), %tmm5 # EVEX TO VEX Compression encoding: [0xc4,0xc2,0x7b,0x4b,0x2c,0x06]
+; EGPR-NEXT: xorl %ecx, %ecx # encoding: [0x31,0xc9]
+; EGPR-NEXT: testb %cl, %cl # encoding: [0x84,0xc9]
; EGPR-NEXT: jne .LBB0_2 # encoding: [0x75,A]
; EGPR-NEXT: # fixup A - offset: 1, value: .LBB0_2, kind: FK_PCRel_1
; EGPR-NEXT: # %bb.1: # %if.true
-; EGPR-NEXT: movl $buf, %eax # encoding: [0xb8,A,A,A,A]
+; EGPR-NEXT: movl $buf, %ecx # encoding: [0xb9,A,A,A,A]
; EGPR-NEXT: # fixup A - offset: 1, value: buf, kind: FK_Data_4
-; EGPR-NEXT: movw $8, %cx # encoding: [0x66,0xb9,0x08,0x00]
-; EGPR-NEXT: tileloadd (%rax,%r14), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x04,0x30]
-; EGPR-NEXT: movl $buf+1024, %eax # encoding: [0xb8,A,A,A,A]
+; EGPR-NEXT: movw $8, %dx # encoding: [0x66,0xba,0x08,0x00]
+; EGPR-NEXT: tileloadd (%rcx,%rax), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x04,0x01]
+; EGPR-NEXT: movl $buf+1024, %ecx # encoding: [0xb9,A,A,A,A]
; EGPR-NEXT: # fixup A - offset: 1, value: buf+1024, kind: FK_Data_4
-; EGPR-NEXT: tileloadd (%rax,%r14), %tmm1 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x0c,0x30]
+; EGPR-NEXT: tileloadd (%rcx,%rax), %tmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x0c,0x01]
; EGPR-NEXT: movabsq $64, %rax # encoding: [0x48,0xb8,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
; EGPR-NEXT: tilestored %tmm5, 1088(%rsp,%rax) # 1024-byte Folded Spill
; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0xac,0x04,0x40,0x04,0x00,0x00]
@@ -151,19 +152,20 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
; EGPR-NEXT: jmp .LBB0_3 # encoding: [0xeb,A]
; EGPR-NEXT: # fixup A - offset: 1, value: .LBB0_3, kind: FK_PCRel_1
; EGPR-NEXT: .LBB0_2: # %if.false
-; EGPR-NEXT: movl $buf, %eax # encoding: [0xb8,A,A,A,A]
+; EGPR-NEXT: movl $buf, %ecx # encoding: [0xb9,A,A,A,A]
; EGPR-NEXT: # fixup A - offset: 1, value: buf, kind: FK_Data_4
-; EGPR-NEXT: movw $8, %cx # encoding: [0x66,0xb9,0x08,0x00]
-; EGPR-NEXT: tileloadd (%rax,%r14), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x14,0x30]
-; EGPR-NEXT: movl $buf+1024, %eax # encoding: [0xb8,A,A,A,A]
+; EGPR-NEXT: movw $8, %dx # encoding: [0x66,0xba,0x08,0x00]
+; EGPR-NEXT: tileloadd (%rcx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x14,0x01]
+; EGPR-NEXT: movl $buf+1024, %ecx # encoding: [0xb9,A,A,A,A]
; EGPR-NEXT: # fixup A - offset: 1, value: buf+1024, kind: FK_Data_4
-; EGPR-NEXT: tileloadd (%rax,%r14), %tmm3 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x1c,0x30]
+; EGPR-NEXT: tileloadd (%rcx,%rax), %tmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x1c,0x01]
; EGPR-NEXT: movabsq $64, %rax # encoding: [0x48,0xb8,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
; EGPR-NEXT: tilestored %tmm5, 1088(%rsp,%rax) # 1024-byte Folded Spill
; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0xac,0x04,0x40,0x04,0x00,0x00]
; EGPR-NEXT: tdpbssd %tmm3, %tmm2, %tmm5 # encoding: [0xc4,0xe2,0x63,0x5e,0xea]
; EGPR-NEXT: tilestored %tmm5, 64(%rsp,%rax) # 1024-byte Folded Spill
; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x6c,0x04,0x40]
+; EGPR-NEXT: movl $32, %r15d # encoding: [0x41,0xbf,0x20,0x00,0x00,0x00]
; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; EGPR-NEXT: callq foo # encoding: [0xe8,A,A,A,A]
@@ -172,7 +174,7 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
; EGPR-NEXT: movabsq $64, %rax # encoding: [0x48,0xb8,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
; EGPR-NEXT: tileloadd 64(%rsp,%rax), %tmm6 # 1024-byte Folded Reload
; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x74,0x04,0x40]
-; EGPR-NEXT: tilestored %tmm6, (%r15,%r14) # EVEX TO VEX Compression encoding: [0xc4,0x82,0x7a,0x4b,0x34,0x37]
+; EGPR-NEXT: tilestored %tmm6, (%r14,%r15) # EVEX TO VEX Compression encoding: [0xc4,0x82,0x7a,0x4b,0x34,0x3e]
; EGPR-NEXT: .LBB0_3: # %exit
; EGPR-NEXT: movl $buf, %eax # encoding: [0xb8,A,A,A,A]
; EGPR-NEXT: # fixup A - offset: 1, value: buf, kind: FK_Data_4
diff --git a/llvm/test/CodeGen/X86/atom-fixup-lea2.ll b/llvm/test/CodeGen/X86/atom-fixup-lea2.ll
index adea9e1386a90..20519a9974542 100644
--- a/llvm/test/CodeGen/X86/atom-fixup-lea2.ll
+++ b/llvm/test/CodeGen/X86/atom-fixup-lea2.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
; CHECK:%bb.5
-; CHECK-NEXT:leal
+; CHECK-NEXT:addl
; CHECK-NEXT:leal
; CHECK-NEXT:leal
; CHECK-NEXT:movl
diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll
index 1369131413053..df682a5eb6f56 100644
--- a/llvm/test/CodeGen/X86/block-placement.ll
+++ b/llvm/test/CodeGen/X86/block-placement.ll
@@ -1057,6 +1057,7 @@ while.end:
}
declare void @cold_function() cold
+declare i32 @foo_function(i32)
define i32 @test_cold_calls(ptr %a) {
; Test that edges to blocks post-dominated by cold calls are
@@ -1085,7 +1086,8 @@ else:
exit:
%ret = phi i32 [ %val1, %then ], [ %val2, %else ]
- ret i32 %ret
+ %ret2 = call i32 @foo_function(i32 %ret)
+ ret i32 %ret2
}
; Make sure we put landingpads out of the way.
diff --git a/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll b/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll
index 3d389523dffb3..f90e05203fce5 100644
--- a/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll
+++ b/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll
@@ -17,7 +17,7 @@ define dso_local void @n(ptr %o, i32 %p, i32 %u) nounwind {
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: movl %edx, %ebp
-; CHECK-NEXT: movl %esi, %r12d
+; CHECK-NEXT: movl %esi, %r15d
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: callq c
; CHECK-NEXT: movl %eax, %r14d
@@ -26,12 +26,13 @@ define dso_local void @n(ptr %o, i32 %p, i32 %u) nounwind {
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: jne .LBB0_9
; CHECK-NEXT: # %bb.1: # %if.end
+; CHECK-NEXT: movl %r15d, %eax
; CHECK-NEXT: cmpl $0, e(%rip)
; CHECK-NEXT: # implicit-def: $r15d
; CHECK-NEXT: # implicit-def: $r13d
; CHECK-NEXT: je .LBB0_4
; CHECK-NEXT: # %bb.2: # %if.then4
-; CHECK-NEXT: movslq %r12d, %rdi
+; CHECK-NEXT: movslq %eax, %rdi
; CHECK-NEXT: callq m
; CHECK-NEXT: # implicit-def: $r15d
; CHECK-NEXT: # implicit-def: $r12d
diff --git a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
index 9b7a43a29a942..0088a92f271c5 100644
--- a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
@@ -706,10 +706,11 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
; X86-X87-NEXT: xorl %edx, %edx
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
-; X86-X87-NEXT: movl $-8, %ebx
+; X86-X87-NEXT: movl $-8, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: jb .LBB8_2
; X86-X87-NEXT: # %bb.1:
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: .LBB8_2:
; X86-X87-NEXT: movl $0, %ecx
; X86-X87-NEXT: movl $0, %ebp
@@ -730,13 +731,13 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
-; X86-X87-NEXT: movl $-1, %eax
; X86-X87-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-X87-NEXT: movl $-1, %eax
; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: ja .LBB8_8
; X86-X87-NEXT: # %bb.7:
-; X86-X87-NEXT: movl %edi, %eax
-; X86-X87-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: movl %ebp, %eax
; X86-X87-NEXT: movl %ecx, %esi
; X86-X87-NEXT: .LBB8_8:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -744,7 +745,7 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
; X86-X87-NEXT: movl $7, %edi
; X86-X87-NEXT: ja .LBB8_10
; X86-X87-NEXT: # %bb.9:
-; X86-X87-NEXT: movl %ebx, %edi
+; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-X87-NEXT: .LBB8_10:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
@@ -875,8 +876,8 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
-; X86-X87-NEXT: movl $0, %eax
-; X86-X87-NEXT: movl $0, %ebx
+; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: jae .LBB9_1
; X86-X87-NEXT: # %bb.2:
; X86-X87-NEXT: movl $0, %edx
@@ -887,7 +888,6 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
; X86-X87-NEXT: .LBB9_5:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB9_6:
-; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-X87-NEXT: fucom %st(1)
@@ -903,13 +903,13 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl $-1, %ebp
-; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: movl $-1, %esi
+; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: ja .LBB9_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %edx, %ebp
-; X86-X87-NEXT: movl %ebx, %edi
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-X87-NEXT: .LBB9_10:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
@@ -920,8 +920,8 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jp .LBB9_12
; X86-X87-NEXT: # %bb.11:
-; X86-X87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-X87-NEXT: movl %edi, %eax
+; X86-X87-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: movl %esi, %eax
; X86-X87-NEXT: movl %ebp, %edx
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-X87-NEXT: .LBB9_12:
@@ -939,7 +939,9 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
; X86-X87-NEXT: retl $4
; X86-X87-NEXT: .LBB9_1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jb .LBB9_4
; X86-X87-NEXT: .LBB9_3:
@@ -1715,10 +1717,11 @@ define i100 @test_signed_i100_f64(double %f) nounwind {
; X86-X87-NEXT: xorl %edx, %edx
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
-; X86-X87-NEXT: movl $-8, %ebx
+; X86-X87-NEXT: movl $-8, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: jb .LBB18_2
; X86-X87-NEXT: # %bb.1:
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: .LBB18_2:
; X86-X87-NEXT: movl $0, %ecx
; X86-X87-NEXT: movl $0, %ebp
@@ -1739,13 +1742,13 @@ define i100 @test_signed_i100_f64(double %f) nounwind {
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
-; X86-X87-NEXT: movl $-1, %eax
; X86-X87-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-X87-NEXT: movl $-1, %eax
; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: ja .LBB18_8
; X86-X87-NEXT: # %bb.7:
-; X86-X87-NEXT: movl %edi, %eax
-; X86-X87-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: movl %ebp, %eax
; X86-X87-NEXT: movl %ecx, %esi
; X86-X87-NEXT: .LBB18_8:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -1753,7 +1756,7 @@ define i100 @test_signed_i100_f64(double %f) nounwind {
; X86-X87-NEXT: movl $7, %edi
; X86-X87-NEXT: ja .LBB18_10
; X86-X87-NEXT: # %bb.9:
-; X86-X87-NEXT: movl %ebx, %edi
+; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-X87-NEXT: .LBB18_10:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
@@ -1884,8 +1887,8 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
-; X86-X87-NEXT: movl $0, %eax
-; X86-X87-NEXT: movl $0, %ebx
+; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: jae .LBB19_1
; X86-X87-NEXT: # %bb.2:
; X86-X87-NEXT: movl $0, %edx
@@ -1896,7 +1899,6 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
; X86-X87-NEXT: .LBB19_5:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB19_6:
-; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: fldl {{\.?LCPI[0-9]+_[0-9]+}}
; X86-X87-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
; X86-X87-NEXT: fucom %st(1)
@@ -1912,13 +1914,13 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl $-1, %ebp
-; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: movl $-1, %esi
+; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: ja .LBB19_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %edx, %ebp
-; X86-X87-NEXT: movl %ebx, %edi
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-X87-NEXT: .LBB19_10:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
@@ -1929,8 +1931,8 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jp .LBB19_12
; X86-X87-NEXT: # %bb.11:
-; X86-X87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-X87-NEXT: movl %edi, %eax
+; X86-X87-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: movl %esi, %eax
; X86-X87-NEXT: movl %ebp, %edx
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-X87-NEXT: .LBB19_12:
@@ -1948,7 +1950,9 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
; X86-X87-NEXT: retl $4
; X86-X87-NEXT: .LBB19_1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jb .LBB19_4
; X86-X87-NEXT: .LBB19_3:
@@ -2857,10 +2861,11 @@ define i100 @test_signed_i100_f16(half %f) nounwind {
; X86-X87-NEXT: xorl %edx, %edx
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
-; X86-X87-NEXT: movl $-8, %ebx
+; X86-X87-NEXT: movl $-8, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: jb .LBB28_2
; X86-X87-NEXT: # %bb.1:
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: .LBB28_2:
; X86-X87-NEXT: movl $0, %ecx
; X86-X87-NEXT: movl $0, %ebp
@@ -2881,13 +2886,13 @@ define i100 @test_signed_i100_f16(half %f) nounwind {
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
-; X86-X87-NEXT: movl $-1, %eax
; X86-X87-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-X87-NEXT: movl $-1, %eax
; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: ja .LBB28_8
; X86-X87-NEXT: # %bb.7:
-; X86-X87-NEXT: movl %edi, %eax
-; X86-X87-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: movl %ebp, %eax
; X86-X87-NEXT: movl %ecx, %esi
; X86-X87-NEXT: .LBB28_8:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -2895,7 +2900,7 @@ define i100 @test_signed_i100_f16(half %f) nounwind {
; X86-X87-NEXT: movl $7, %edi
; X86-X87-NEXT: ja .LBB28_10
; X86-X87-NEXT: # %bb.9:
-; X86-X87-NEXT: movl %ebx, %edi
+; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-X87-NEXT: .LBB28_10:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
@@ -3036,8 +3041,8 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
-; X86-X87-NEXT: movl $0, %eax
-; X86-X87-NEXT: movl $0, %ebx
+; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: jae .LBB29_1
; X86-X87-NEXT: # %bb.2:
; X86-X87-NEXT: movl $0, %edx
@@ -3048,7 +3053,6 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
; X86-X87-NEXT: .LBB29_5:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB29_6:
-; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-X87-NEXT: fucom %st(1)
@@ -3064,13 +3068,13 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl $-1, %ebp
-; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: movl $-1, %esi
+; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: ja .LBB29_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %edx, %ebp
-; X86-X87-NEXT: movl %ebx, %edi
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-X87-NEXT: .LBB29_10:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
@@ -3081,8 +3085,8 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jp .LBB29_12
; X86-X87-NEXT: # %bb.11:
-; X86-X87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-X87-NEXT: movl %edi, %eax
+; X86-X87-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: movl %esi, %eax
; X86-X87-NEXT: movl %ebp, %edx
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-X87-NEXT: .LBB29_12:
@@ -3100,7 +3104,9 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
; X86-X87-NEXT: retl $4
; X86-X87-NEXT: .LBB29_1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jb .LBB29_4
; X86-X87-NEXT: .LBB29_3:
@@ -4188,10 +4194,11 @@ define i100 @test_signed_i100_f80(x86_fp80 %f) nounwind {
; X86-X87-NEXT: xorl %edx, %edx
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
-; X86-X87-NEXT: movl $-8, %ebx
+; X86-X87-NEXT: movl $-8, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: jb .LBB38_2
; X86-X87-NEXT: # %bb.1:
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: .LBB38_2:
; X86-X87-NEXT: movl $0, %ecx
; X86-X87-NEXT: movl $0, %ebp
@@ -4212,13 +4219,13 @@ define i100 @test_signed_i100_f80(x86_fp80 %f) nounwind {
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
-; X86-X87-NEXT: movl $-1, %eax
; X86-X87-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-X87-NEXT: movl $-1, %eax
; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: ja .LBB38_8
; X86-X87-NEXT: # %bb.7:
-; X86-X87-NEXT: movl %edi, %eax
-; X86-X87-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: movl %ebp, %eax
; X86-X87-NEXT: movl %ecx, %esi
; X86-X87-NEXT: .LBB38_8:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -4226,7 +4233,7 @@ define i100 @test_signed_i100_f80(x86_fp80 %f) nounwind {
; X86-X87-NEXT: movl $7, %edi
; X86-X87-NEXT: ja .LBB38_10
; X86-X87-NEXT: # %bb.9:
-; X86-X87-NEXT: movl %ebx, %edi
+; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-X87-NEXT: .LBB38_10:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
@@ -4374,8 +4381,8 @@ define i128 @test_signed_i128_f80(x86_fp80 %f) nounwind {
; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
-; X86-X87-NEXT: movl $0, %eax
-; X86-X87-NEXT: movl $0, %ebx
+; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: jae .LBB39_1
; X86-X87-NEXT: # %bb.2:
; X86-X87-NEXT: movl $0, %edx
@@ -4386,7 +4393,6 @@ define i128 @test_signed_i128_f80(x86_fp80 %f) nounwind {
; X86-X87-NEXT: .LBB39_5:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB39_6:
-; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}
; X86-X87-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; X86-X87-NEXT: fucom %st(1)
@@ -4402,13 +4408,13 @@ define i128 @test_signed_i128_f80(x86_fp80 %f) nounwind {
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl $-1, %ebp
-; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: movl $-1, %esi
+; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: ja .LBB39_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %edx, %ebp
-; X86-X87-NEXT: movl %ebx, %edi
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-X87-NEXT: .LBB39_10:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
@@ -4419,8 +4425,8 @@ define i128 @test_signed_i128_f80(x86_fp80 %f) nounwind {
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jp .LBB39_12
; X86-X87-NEXT: # %bb.11:
-; X86-X87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-X87-NEXT: movl %edi, %eax
+; X86-X87-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: movl %esi, %eax
; X86-X87-NEXT: movl %ebp, %edx
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-X87-NEXT: .LBB39_12:
@@ -4438,7 +4444,9 @@ define i128 @test_signed_i128_f80(x86_fp80 %f) nounwind {
; X86-X87-NEXT: retl $4
; X86-X87-NEXT: .LBB39_1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jb .LBB39_4
; X86-X87-NEXT: .LBB39_3:
diff --git a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
index a074c78d512f5..d074eaf7323d4 100644
--- a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
@@ -676,7 +676,7 @@ define i100 @test_unsigned_i100_f32(float %f) nounwind {
; X86-X87-NEXT: movl %eax, %ebx
; X86-X87-NEXT: calll __fixunssfti
; X86-X87-NEXT: subl $4, %esp
-; X86-X87-NEXT: xorl %edi, %edi
+; X86-X87-NEXT: xorl %esi, %esi
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
@@ -684,16 +684,16 @@ define i100 @test_unsigned_i100_f32(float %f) nounwind {
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: .LBB8_2:
-; X86-X87-NEXT: movl $0, %esi
+; X86-X87-NEXT: movl $0, %edi
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jb .LBB8_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB8_4:
; X86-X87-NEXT: jb .LBB8_6
; X86-X87-NEXT: # %bb.5:
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-X87-NEXT: .LBB8_6:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -706,20 +706,20 @@ define i100 @test_unsigned_i100_f32(float %f) nounwind {
; X86-X87-NEXT: movl $15, %eax
; X86-X87-NEXT: ja .LBB8_8
; X86-X87-NEXT: # %bb.7:
-; X86-X87-NEXT: movl %edi, %eax
+; X86-X87-NEXT: movl %esi, %eax
; X86-X87-NEXT: .LBB8_8:
-; X86-X87-NEXT: movl $-1, %edi
+; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: movl $-1, %ebp
; X86-X87-NEXT: movl $-1, %edx
; X86-X87-NEXT: ja .LBB8_10
; X86-X87-NEXT: # %bb.9:
-; X86-X87-NEXT: movl %ebx, %edi
-; X86-X87-NEXT: movl %esi, %ebp
+; X86-X87-NEXT: movl %ebx, %esi
+; X86-X87-NEXT: movl %edi, %ebp
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-X87-NEXT: .LBB8_10:
; X86-X87-NEXT: movl %edx, 8(%ecx)
; X86-X87-NEXT: movl %ebp, 4(%ecx)
-; X86-X87-NEXT: movl %edi, (%ecx)
+; X86-X87-NEXT: movl %esi, (%ecx)
; X86-X87-NEXT: andl $15, %eax
; X86-X87-NEXT: movb %al, 12(%ecx)
; X86-X87-NEXT: movl %ecx, %eax
@@ -820,7 +820,7 @@ define i128 @test_unsigned_i128_f32(float %f) nounwind {
; X86-X87-NEXT: movl %eax, %ebx
; X86-X87-NEXT: calll __fixunssfti
; X86-X87-NEXT: subl $4, %esp
-; X86-X87-NEXT: xorl %edx, %edx
+; X86-X87-NEXT: xorl %edi, %edi
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
@@ -828,19 +828,19 @@ define i128 @test_unsigned_i128_f32(float %f) nounwind {
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: .LBB9_2:
-; X86-X87-NEXT: movl $0, %ecx
+; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: jb .LBB9_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-X87-NEXT: .LBB9_4:
; X86-X87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: .LBB9_4:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jb .LBB9_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB9_6:
; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
@@ -850,17 +850,17 @@ define i128 @test_unsigned_i128_f32(float %f) nounwind {
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-1, %eax
; X86-X87-NEXT: movl $-1, %ebp
-; X86-X87-NEXT: movl $-1, %edi
+; X86-X87-NEXT: movl $-1, %edx
; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: ja .LBB9_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %ebx, %eax
-; X86-X87-NEXT: movl %edx, %ebp
-; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-X87-NEXT: movl %edi, %ebp
+; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-X87-NEXT: .LBB9_8:
; X86-X87-NEXT: movl %esi, 12(%ecx)
-; X86-X87-NEXT: movl %edi, 8(%ecx)
+; X86-X87-NEXT: movl %edx, 8(%ecx)
; X86-X87-NEXT: movl %ebp, 4(%ecx)
; X86-X87-NEXT: movl %eax, (%ecx)
; X86-X87-NEXT: movl %ecx, %eax
@@ -1588,7 +1588,7 @@ define i100 @test_unsigned_i100_f64(double %f) nounwind {
; X86-X87-NEXT: movl %eax, %ebx
; X86-X87-NEXT: calll __fixunsdfti
; X86-X87-NEXT: subl $4, %esp
-; X86-X87-NEXT: xorl %edi, %edi
+; X86-X87-NEXT: xorl %esi, %esi
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
@@ -1596,16 +1596,16 @@ define i100 @test_unsigned_i100_f64(double %f) nounwind {
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: .LBB18_2:
-; X86-X87-NEXT: movl $0, %esi
+; X86-X87-NEXT: movl $0, %edi
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jb .LBB18_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB18_4:
; X86-X87-NEXT: jb .LBB18_6
; X86-X87-NEXT: # %bb.5:
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-X87-NEXT: .LBB18_6:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -1618,20 +1618,20 @@ define i100 @test_unsigned_i100_f64(double %f) nounwind {
; X86-X87-NEXT: movl $15, %eax
; X86-X87-NEXT: ja .LBB18_8
; X86-X87-NEXT: # %bb.7:
-; X86-X87-NEXT: movl %edi, %eax
+; X86-X87-NEXT: movl %esi, %eax
; X86-X87-NEXT: .LBB18_8:
-; X86-X87-NEXT: movl $-1, %edi
+; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: movl $-1, %ebp
; X86-X87-NEXT: movl $-1, %edx
; X86-X87-NEXT: ja .LBB18_10
; X86-X87-NEXT: # %bb.9:
-; X86-X87-NEXT: movl %ebx, %edi
-; X86-X87-NEXT: movl %esi, %ebp
+; X86-X87-NEXT: movl %ebx, %esi
+; X86-X87-NEXT: movl %edi, %ebp
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-X87-NEXT: .LBB18_10:
; X86-X87-NEXT: movl %edx, 8(%ecx)
; X86-X87-NEXT: movl %ebp, 4(%ecx)
-; X86-X87-NEXT: movl %edi, (%ecx)
+; X86-X87-NEXT: movl %esi, (%ecx)
; X86-X87-NEXT: andl $15, %eax
; X86-X87-NEXT: movb %al, 12(%ecx)
; X86-X87-NEXT: movl %ecx, %eax
@@ -1732,7 +1732,7 @@ define i128 @test_unsigned_i128_f64(double %f) nounwind {
; X86-X87-NEXT: movl %eax, %ebx
; X86-X87-NEXT: calll __fixunsdfti
; X86-X87-NEXT: subl $4, %esp
-; X86-X87-NEXT: xorl %edx, %edx
+; X86-X87-NEXT: xorl %edi, %edi
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
@@ -1740,19 +1740,19 @@ define i128 @test_unsigned_i128_f64(double %f) nounwind {
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: .LBB19_2:
-; X86-X87-NEXT: movl $0, %ecx
+; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: jb .LBB19_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-X87-NEXT: .LBB19_4:
; X86-X87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: .LBB19_4:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jb .LBB19_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB19_6:
; X86-X87-NEXT: fldl {{\.?LCPI[0-9]+_[0-9]+}}
; X86-X87-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
@@ -1762,17 +1762,17 @@ define i128 @test_unsigned_i128_f64(double %f) nounwind {
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-1, %eax
; X86-X87-NEXT: movl $-1, %ebp
-; X86-X87-NEXT: movl $-1, %edi
+; X86-X87-NEXT: movl $-1, %edx
; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: ja .LBB19_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %ebx, %eax
-; X86-X87-NEXT: movl %edx, %ebp
-; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-X87-NEXT: movl %edi, %ebp
+; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-X87-NEXT: .LBB19_8:
; X86-X87-NEXT: movl %esi, 12(%ecx)
-; X86-X87-NEXT: movl %edi, 8(%ecx)
+; X86-X87-NEXT: movl %edx, 8(%ecx)
; X86-X87-NEXT: movl %ebp, 4(%ecx)
; X86-X87-NEXT: movl %eax, (%ecx)
; X86-X87-NEXT: movl %ecx, %eax
@@ -2627,7 +2627,7 @@ define i100 @test_unsigned_i100_f16(half %f) nounwind {
; X86-X87-NEXT: movl %eax, %ebx
; X86-X87-NEXT: calll __fixunssfti
; X86-X87-NEXT: subl $4, %esp
-; X86-X87-NEXT: xorl %edi, %edi
+; X86-X87-NEXT: xorl %esi, %esi
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
@@ -2635,16 +2635,16 @@ define i100 @test_unsigned_i100_f16(half %f) nounwind {
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: .LBB28_2:
-; X86-X87-NEXT: movl $0, %esi
+; X86-X87-NEXT: movl $0, %edi
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jb .LBB28_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB28_4:
; X86-X87-NEXT: jb .LBB28_6
; X86-X87-NEXT: # %bb.5:
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-X87-NEXT: .LBB28_6:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -2657,20 +2657,20 @@ define i100 @test_unsigned_i100_f16(half %f) nounwind {
; X86-X87-NEXT: movl $15, %eax
; X86-X87-NEXT: ja .LBB28_8
; X86-X87-NEXT: # %bb.7:
-; X86-X87-NEXT: movl %edi, %eax
+; X86-X87-NEXT: movl %esi, %eax
; X86-X87-NEXT: .LBB28_8:
-; X86-X87-NEXT: movl $-1, %edi
+; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: movl $-1, %ebp
; X86-X87-NEXT: movl $-1, %edx
; X86-X87-NEXT: ja .LBB28_10
; X86-X87-NEXT: # %bb.9:
-; X86-X87-NEXT: movl %ebx, %edi
-; X86-X87-NEXT: movl %esi, %ebp
+; X86-X87-NEXT: movl %ebx, %esi
+; X86-X87-NEXT: movl %edi, %ebp
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-X87-NEXT: .LBB28_10:
; X86-X87-NEXT: movl %edx, 8(%ecx)
; X86-X87-NEXT: movl %ebp, 4(%ecx)
-; X86-X87-NEXT: movl %edi, (%ecx)
+; X86-X87-NEXT: movl %esi, (%ecx)
; X86-X87-NEXT: andl $15, %eax
; X86-X87-NEXT: movb %al, 12(%ecx)
; X86-X87-NEXT: movl %ecx, %eax
@@ -2781,7 +2781,7 @@ define i128 @test_unsigned_i128_f16(half %f) nounwind {
; X86-X87-NEXT: movl %eax, %ebx
; X86-X87-NEXT: calll __fixunssfti
; X86-X87-NEXT: subl $4, %esp
-; X86-X87-NEXT: xorl %edx, %edx
+; X86-X87-NEXT: xorl %edi, %edi
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
@@ -2789,19 +2789,19 @@ define i128 @test_unsigned_i128_f16(half %f) nounwind {
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: .LBB29_2:
-; X86-X87-NEXT: movl $0, %ecx
+; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: jb .LBB29_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-X87-NEXT: .LBB29_4:
; X86-X87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: .LBB29_4:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jb .LBB29_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB29_6:
; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
@@ -2811,17 +2811,17 @@ define i128 @test_unsigned_i128_f16(half %f) nounwind {
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-1, %eax
; X86-X87-NEXT: movl $-1, %ebp
-; X86-X87-NEXT: movl $-1, %edi
+; X86-X87-NEXT: movl $-1, %edx
; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: ja .LBB29_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %ebx, %eax
-; X86-X87-NEXT: movl %edx, %ebp
-; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-X87-NEXT: movl %edi, %ebp
+; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-X87-NEXT: .LBB29_8:
; X86-X87-NEXT: movl %esi, 12(%ecx)
-; X86-X87-NEXT: movl %edi, 8(%ecx)
+; X86-X87-NEXT: movl %edx, 8(%ecx)
; X86-X87-NEXT: movl %ebp, 4(%ecx)
; X86-X87-NEXT: movl %eax, (%ecx)
; X86-X87-NEXT: movl %ecx, %eax
@@ -3842,7 +3842,7 @@ define i100 @test_unsigned_i100_f80(x86_fp80 %f) nounwind {
; X86-X87-NEXT: movl %eax, %ebx
; X86-X87-NEXT: calll __fixunsxfti
; X86-X87-NEXT: subl $4, %esp
-; X86-X87-NEXT: xorl %edi, %edi
+; X86-X87-NEXT: xorl %esi, %esi
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
@@ -3850,16 +3850,16 @@ define i100 @test_unsigned_i100_f80(x86_fp80 %f) nounwind {
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: .LBB38_2:
-; X86-X87-NEXT: movl $0, %esi
+; X86-X87-NEXT: movl $0, %edi
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jb .LBB38_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB38_4:
; X86-X87-NEXT: jb .LBB38_6
; X86-X87-NEXT: # %bb.5:
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-X87-NEXT: .LBB38_6:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -3872,20 +3872,20 @@ define i100 @test_unsigned_i100_f80(x86_fp80 %f) nounwind {
; X86-X87-NEXT: movl $15, %eax
; X86-X87-NEXT: ja .LBB38_8
; X86-X87-NEXT: # %bb.7:
-; X86-X87-NEXT: movl %edi, %eax
+; X86-X87-NEXT: movl %esi, %eax
; X86-X87-NEXT: .LBB38_8:
-; X86-X87-NEXT: movl $-1, %edi
+; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: movl $-1, %ebp
; X86-X87-NEXT: movl $-1, %edx
; X86-X87-NEXT: ja .LBB38_10
; X86-X87-NEXT: # %bb.9:
-; X86-X87-NEXT: movl %ebx, %edi
-; X86-X87-NEXT: movl %esi, %ebp
+; X86-X87-NEXT: movl %ebx, %esi
+; X86-X87-NEXT: movl %edi, %ebp
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-X87-NEXT: .LBB38_10:
; X86-X87-NEXT: movl %edx, 8(%ecx)
; X86-X87-NEXT: movl %ebp, 4(%ecx)
-; X86-X87-NEXT: movl %edi, (%ecx)
+; X86-X87-NEXT: movl %esi, (%ecx)
; X86-X87-NEXT: andl $15, %eax
; X86-X87-NEXT: movb %al, 12(%ecx)
; X86-X87-NEXT: movl %ecx, %eax
@@ -4000,7 +4000,7 @@ define i128 @test_unsigned_i128_f80(x86_fp80 %f) nounwind {
; X86-X87-NEXT: movl %eax, %ebx
; X86-X87-NEXT: calll __fixunsxfti
; X86-X87-NEXT: subl $4, %esp
-; X86-X87-NEXT: xorl %edx, %edx
+; X86-X87-NEXT: xorl %edi, %edi
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
@@ -4008,19 +4008,19 @@ define i128 @test_unsigned_i128_f80(x86_fp80 %f) nounwind {
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: .LBB39_2:
-; X86-X87-NEXT: movl $0, %ecx
+; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: jb .LBB39_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-X87-NEXT: .LBB39_4:
; X86-X87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-X87-NEXT: .LBB39_4:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jb .LBB39_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB39_6:
; X86-X87-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}
; X86-X87-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
@@ -4030,17 +4030,17 @@ define i128 @test_unsigned_i128_f80(x86_fp80 %f) nounwind {
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-1, %eax
; X86-X87-NEXT: movl $-1, %ebp
-; X86-X87-NEXT: movl $-1, %edi
+; X86-X87-NEXT: movl $-1, %edx
; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: ja .LBB39_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %ebx, %eax
-; X86-X87-NEXT: movl %edx, %ebp
-; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-X87-NEXT: movl %edi, %ebp
+; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-X87-NEXT: .LBB39_8:
; X86-X87-NEXT: movl %esi, 12(%ecx)
-; X86-X87-NEXT: movl %edi, 8(%ecx)
+; X86-X87-NEXT: movl %edx, 8(%ecx)
; X86-X87-NEXT: movl %ebp, 4(%ecx)
; X86-X87-NEXT: movl %eax, (%ecx)
; X86-X87-NEXT: movl %ecx, %eax
diff --git a/llvm/test/CodeGen/X86/indirect-branch-tracking-eh.ll b/llvm/test/CodeGen/X86/indirect-branch-tracking-eh.ll
index 8403d4b754d80..d936c7b335486 100644
--- a/llvm/test/CodeGen/X86/indirect-branch-tracking-eh.ll
+++ b/llvm/test/CodeGen/X86/indirect-branch-tracking-eh.ll
@@ -49,7 +49,6 @@
; SJLJ: LBB0_3:
; SJLJ-NEXT: endbr32
-; SJLJ-NEXT: leal
; SJLJ-NEXT: movl
; SJLJ-NEXT: cmpl
; SJLJ-NEXT: jb LBB0_4
diff --git a/llvm/test/CodeGen/X86/no-split-size.ll b/llvm/test/CodeGen/X86/no-split-size.ll
index c1f93acd77dee..d2ae2ffffa819 100644
--- a/llvm/test/CodeGen/X86/no-split-size.ll
+++ b/llvm/test/CodeGen/X86/no-split-size.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -regalloc-csr-cost-scale=0 | FileCheck %s
; @foo is optimized for size. Variables %p2, %p3, %p4, %p5 and %p6 are not split
; in cold blocks.
diff --git a/llvm/test/CodeGen/X86/ragreedy-bug.ll b/llvm/test/CodeGen/X86/ragreedy-bug.ll
index 0d9332ef9cdd1..67e080eaf1d9a 100644
--- a/llvm/test/CodeGen/X86/ragreedy-bug.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-bug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-macosx -regalloc=greedy | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -regalloc=greedy --regalloc-csr-cost-scale=0 | FileCheck %s
; This testing case is reduced from 197.parser prune_match function.
; We make sure register copies are not generated on isupper.exit blocks.
diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
index 69a6cdb7081eb..60ebb4bd41ebb 100644
--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -78,9 +78,10 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: LBB0_8: ## %while.body.preheader
; CHECK-NEXT: imulq $1040, %rdx, %rax ## imm = 0x410
; CHECK-NEXT: movq _syBuf at GOTPCREL(%rip), %rcx
-; CHECK-NEXT: leaq 8(%rcx,%rax), %rdx
+; CHECK-NEXT: leaq 8(%rcx,%rax), %rax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK-NEXT: movq _syCTRO at GOTPCREL(%rip), %rax
-; CHECK-NEXT: movl $1, %r13d
+; CHECK-NEXT: movl $1, %ebp
; CHECK-NEXT: movb $1, %cl
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: LBB0_9: ## %do.body
@@ -89,9 +90,8 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: testb %cl, %cl
; CHECK-NEXT: jne LBB0_9
; CHECK-NEXT: ## %bb.10: ## %do.end
-; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT: xorl %ebp, %ebp
+; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne LBB0_11
; CHECK-NEXT: ## %bb.12: ## %while.body200.preheader
@@ -100,7 +100,7 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: leaq LJTI0_1(%rip), %r14
; CHECK-NEXT: movb $1, %sil
; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
-; CHECK-NEXT: xorl %r15d, %r15d
+; CHECK-NEXT: xorl %r13d, %r13d
; CHECK-NEXT: jmp LBB0_13
; CHECK-NEXT: LBB0_43: ## %while.cond1037.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
@@ -109,15 +109,15 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: LBB0_20: ## %while.cond197.backedge
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: decl %r13d
-; CHECK-NEXT: testl %r13d, %r13d
-; CHECK-NEXT: movl %ebp, %r15d
+; CHECK-NEXT: decl %ebp
+; CHECK-NEXT: testl %ebp, %ebp
+; CHECK-NEXT: movl %ebx, %r13d
; CHECK-NEXT: jle LBB0_21
; CHECK-NEXT: LBB0_13: ## %while.body200
; CHECK-NEXT: ## =>This Loop Header: Depth=1
; CHECK-NEXT: ## Child Loop BB0_28 Depth 2
; CHECK-NEXT: ## Child Loop BB0_37 Depth 2
-; CHECK-NEXT: leal -268(%rbp), %eax
+; CHECK-NEXT: leal -268(%rbx), %eax
; CHECK-NEXT: cmpl $105, %eax
; CHECK-NEXT: ja LBB0_14
; CHECK-NEXT: ## %bb.55: ## %while.body200
@@ -128,12 +128,12 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: LBB0_25: ## %sw.bb474
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: testb %r12b, %r12b
-; CHECK-NEXT: ## implicit-def: $rbx
+; CHECK-NEXT: ## implicit-def: $r15
; CHECK-NEXT: jne LBB0_33
; CHECK-NEXT: ## %bb.26: ## %do.body479.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: testb %r12b, %r12b
-; CHECK-NEXT: ## implicit-def: $rbx
+; CHECK-NEXT: ## implicit-def: $r15
; CHECK-NEXT: jne LBB0_33
; CHECK-NEXT: ## %bb.27: ## %land.rhs485.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
@@ -142,7 +142,7 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: LBB0_31: ## %do.body479.backedge
; CHECK-NEXT: ## in Loop: Header=BB0_28 Depth=2
-; CHECK-NEXT: leaq 1(%rbx), %rax
+; CHECK-NEXT: leaq 1(%r15), %rax
; CHECK-NEXT: testb %r12b, %r12b
; CHECK-NEXT: je LBB0_32
; CHECK-NEXT: LBB0_28: ## %land.rhs485
@@ -152,7 +152,7 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: jne LBB0_54
; CHECK-NEXT: ## %bb.29: ## %cond.true.i.i2780
; CHECK-NEXT: ## in Loop: Header=BB0_28 Depth=2
-; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: movq %rax, %r15
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne LBB0_31
; CHECK-NEXT: ## %bb.30: ## %lor.rhs500
@@ -167,7 +167,7 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: LBB0_14: ## %while.body200
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: leal 1(%rbp), %eax
+; CHECK-NEXT: leal 1(%rbx), %eax
; CHECK-NEXT: cmpl $21, %eax
; CHECK-NEXT: ja LBB0_20
; CHECK-NEXT: ## %bb.15: ## %while.body200
@@ -177,7 +177,7 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: jmpq *%rax
; CHECK-NEXT: LBB0_18: ## %while.cond201.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: movl $1, %ebp
+; CHECK-NEXT: movl $1, %ebx
; CHECK-NEXT: jmp LBB0_20
; CHECK-NEXT: LBB0_44: ## %sw.bb1134
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
@@ -187,22 +187,22 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: jb LBB0_54
; CHECK-NEXT: ## %bb.45: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
-; CHECK-NEXT: movl $268, %ebp ## imm = 0x10C
+; CHECK-NEXT: movl $268, %ebx ## imm = 0x10C
; CHECK-NEXT: jmp LBB0_20
; CHECK-NEXT: LBB0_39: ## %sw.bb566
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: movl $20, %ebp
+; CHECK-NEXT: movl $20, %ebx
; CHECK-NEXT: jmp LBB0_20
; CHECK-NEXT: LBB0_19: ## %sw.bb243
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: movl $2, %ebp
+; CHECK-NEXT: movl $2, %ebx
; CHECK-NEXT: jmp LBB0_20
; CHECK-NEXT: LBB0_32: ## %if.end517.loopexitsplit
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: incq %rbx
+; CHECK-NEXT: incq %r15
; CHECK-NEXT: LBB0_33: ## %if.end517
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: leal -324(%r15), %eax
+; CHECK-NEXT: leal -324(%r13), %eax
; CHECK-NEXT: cmpl $59, %eax
; CHECK-NEXT: ja LBB0_34
; CHECK-NEXT: ## %bb.56: ## %if.end517
@@ -212,11 +212,11 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: jb LBB0_37
; CHECK-NEXT: LBB0_34: ## %if.end517
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: cmpl $11, %r15d
+; CHECK-NEXT: cmpl $11, %r13d
; CHECK-NEXT: je LBB0_37
; CHECK-NEXT: ## %bb.35: ## %if.end517
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: cmpl $24, %r15d
+; CHECK-NEXT: cmpl $24, %r13d
; CHECK-NEXT: je LBB0_37
; CHECK-NEXT: ## %bb.36: ## %if.then532
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
@@ -231,7 +231,7 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: ## %bb.38: ## %for.cond542.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: movb $0, (%rbx)
+; CHECK-NEXT: movb $0, (%r15)
; CHECK-NEXT: leaq LJTI0_0(%rip), %rdx
; CHECK-NEXT: jmp LBB0_20
; CHECK-NEXT: .p2align 4
@@ -249,12 +249,12 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: LBB0_11:
; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
; CHECK-NEXT: LBB0_21: ## %while.end1465
-; CHECK-NEXT: incl %ebp
-; CHECK-NEXT: cmpl $16, %ebp
+; CHECK-NEXT: incl %ebx
+; CHECK-NEXT: cmpl $16, %ebx
; CHECK-NEXT: ja LBB0_49
; CHECK-NEXT: ## %bb.22: ## %while.end1465
; CHECK-NEXT: movl $83969, %eax ## imm = 0x14801
-; CHECK-NEXT: btl %ebp, %eax
+; CHECK-NEXT: btl %ebx, %eax
; CHECK-NEXT: jae LBB0_49
; CHECK-NEXT: ## %bb.23:
; CHECK-NEXT: xorl %ebx, %ebx
@@ -294,19 +294,19 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: ## %bb.50: ## %for.body1664.lr.ph
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ebp ## 4-byte Reload
; CHECK-NEXT: jne LBB0_53
; CHECK-NEXT: ## %bb.51: ## %while.body1679.preheader
; CHECK-NEXT: incl %ebp
-; CHECK-NEXT: xorl %ebx, %ebx
+; CHECK-NEXT: xorl %r14d, %r14d
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: LBB0_52: ## %while.body1679
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq (%r14), %rdi
+; CHECK-NEXT: movq (%rbx), %rdi
; CHECK-NEXT: callq _fileno
; CHECK-NEXT: incl %ebp
-; CHECK-NEXT: testb %bl, %bl
+; CHECK-NEXT: testb %r14b, %r14b
; CHECK-NEXT: jne LBB0_52
; CHECK-NEXT: LBB0_53: ## %while.cond1683.preheader
; CHECK-NEXT: testb %al, %al
diff --git a/llvm/test/CodeGen/X86/sjlj-eh.ll b/llvm/test/CodeGen/X86/sjlj-eh.ll
index d2dcb35a4908e..2c9aa6fd7d55d 100644
--- a/llvm/test/CodeGen/X86/sjlj-eh.ll
+++ b/llvm/test/CodeGen/X86/sjlj-eh.ll
@@ -1,5 +1,5 @@
; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39439.
-; RUN: llc -mtriple i386-windows-gnu -exception-model sjlj -filetype asm -o - %s -verify-machineinstrs=0 | FileCheck %s
+; RUN: llc -mtriple i386-windows-gnu -regalloc-csr-cost-scale=0 -exception-model sjlj -filetype asm -o - %s -verify-machineinstrs=0 | FileCheck %s
; RUN: llc -mtriple x86_64-windows-gnu -exception-model sjlj -filetype asm -o - %s -verify-machineinstrs=0 | FileCheck %s -check-prefix CHECK-X64
; RUN: llc -mtriple x86_64-linux -exception-model sjlj -filetype asm -o - %s -verify-machineinstrs=0 | FileCheck %s -check-prefix CHECK-X64-LINUX
diff --git a/llvm/test/CodeGen/X86/speculative-load-hardening.ll b/llvm/test/CodeGen/X86/speculative-load-hardening.ll
index 5fd1f77e166d4..5f107818e9fa1 100644
--- a/llvm/test/CodeGen/X86/speculative-load-hardening.ll
+++ b/llvm/test/CodeGen/X86/speculative-load-hardening.ll
@@ -32,87 +32,85 @@ entry:
define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, ptr %ptr1, ptr %ptr2, ptr %ptr3) speculative_load_hardening {
; X64-LABEL: test_basic_conditions:
; X64: # %bb.0: # %entry
-; X64-NEXT: pushq %r15
-; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: pushq %r14
-; X64-NEXT: .cfi_def_cfa_offset 24
+; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: pushq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 24
+; X64-NEXT: pushq %rax
; X64-NEXT: .cfi_def_cfa_offset 32
-; X64-NEXT: .cfi_offset %rbx, -32
-; X64-NEXT: .cfi_offset %r14, -24
-; X64-NEXT: .cfi_offset %r15, -16
+; X64-NEXT: .cfi_offset %rbx, -24
+; X64-NEXT: .cfi_offset %r14, -16
; X64-NEXT: movq %rsp, %rax
-; X64-NEXT: movq $-1, %rbx
+; X64-NEXT: movq $-1, %r10
; X64-NEXT: sarq $63, %rax
; X64-NEXT: testl %edi, %edi
; X64-NEXT: jne .LBB1_1
; X64-NEXT: # %bb.2: # %then1
-; X64-NEXT: cmovneq %rbx, %rax
+; X64-NEXT: cmovneq %r10, %rax
; X64-NEXT: testl %esi, %esi
; X64-NEXT: je .LBB1_4
; X64-NEXT: .LBB1_1:
-; X64-NEXT: cmoveq %rbx, %rax
+; X64-NEXT: cmoveq %r10, %rax
; X64-NEXT: .LBB1_8: # %exit
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
-; X64-NEXT: popq %rbx
+; X64-NEXT: addq $8, %rsp
; X64-NEXT: .cfi_def_cfa_offset 24
-; X64-NEXT: popq %r14
+; X64-NEXT: popq %rbx
; X64-NEXT: .cfi_def_cfa_offset 16
-; X64-NEXT: popq %r15
+; X64-NEXT: popq %r14
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
; X64-NEXT: .LBB1_4: # %then2
; X64-NEXT: .cfi_def_cfa_offset 32
-; X64-NEXT: cmovneq %rbx, %rax
+; X64-NEXT: cmovneq %r10, %rax
; X64-NEXT: testl %edx, %edx
; X64-NEXT: je .LBB1_6
; X64-NEXT: # %bb.5: # %else3
-; X64-NEXT: cmoveq %rbx, %rax
-; X64-NEXT: movslq (%r9), %rcx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: leaq (%r8,%rcx,4), %r14
-; X64-NEXT: movl %ecx, (%r8,%rcx,4)
+; X64-NEXT: cmoveq %r10, %rax
+; X64-NEXT: movslq (%r9), %rdx
+; X64-NEXT: orq %rax, %rdx
+; X64-NEXT: leaq (%r8,%rdx,4), %rcx
+; X64-NEXT: movl %edx, (%r8,%rdx,4)
; X64-NEXT: jmp .LBB1_7
; X64-NEXT: .LBB1_6: # %then3
-; X64-NEXT: cmovneq %rbx, %rax
+; X64-NEXT: cmovneq %r10, %rax
; X64-NEXT: movl (%rcx), %ecx
; X64-NEXT: addl (%r8), %ecx
; X64-NEXT: movslq %ecx, %rdi
; X64-NEXT: orq %rax, %rdi
; X64-NEXT: movl (%r8,%rdi,4), %esi
; X64-NEXT: orl %eax, %esi
-; X64-NEXT: movq (%r9), %r14
-; X64-NEXT: orq %rax, %r14
-; X64-NEXT: addl (%r14), %esi
+; X64-NEXT: movq (%r9), %rbx
+; X64-NEXT: orq %rax, %rbx
+; X64-NEXT: addl (%rbx), %esi
; X64-NEXT: shlq $47, %rax
; X64-NEXT: # kill: def $edi killed $edi killed $rdi
; X64-NEXT: orq %rax, %rsp
-; X64-NEXT: movq %r8, %r15
+; X64-NEXT: movq %r8, (%rsp) # 8-byte Spill
+; X64-NEXT: movq $-1, %r14
; X64-NEXT: callq leak at PLT
; X64-NEXT: .Lslh_ret_addr0:
-; X64-NEXT: movq %r15, %r8
+; X64-NEXT: movq %rbx, %rcx
+; X64-NEXT: movq (%rsp), %r8 # 8-byte Reload
; X64-NEXT: movq %rsp, %rax
-; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
+; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx
; X64-NEXT: sarq $63, %rax
-; X64-NEXT: cmpq $.Lslh_ret_addr0, %rcx
-; X64-NEXT: cmovneq %rbx, %rax
+; X64-NEXT: cmpq $.Lslh_ret_addr0, %rdx
+; X64-NEXT: cmovneq %r14, %rax
; X64-NEXT: .LBB1_7: # %merge
-; X64-NEXT: movslq (%r14), %rcx
+; X64-NEXT: movslq (%rcx), %rcx
; X64-NEXT: orq %rax, %rcx
; X64-NEXT: movl $0, (%r8,%rcx,4)
; X64-NEXT: jmp .LBB1_8
;
; X64-LFENCE-LABEL: test_basic_conditions:
; X64-LFENCE: # %bb.0: # %entry
-; X64-LFENCE-NEXT: pushq %r14
-; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16
; X64-LFENCE-NEXT: pushq %rbx
-; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24
-; X64-LFENCE-NEXT: pushq %rax
+; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16
+; X64-LFENCE-NEXT: subq $16, %rsp
; X64-LFENCE-NEXT: .cfi_def_cfa_offset 32
-; X64-LFENCE-NEXT: .cfi_offset %rbx, -24
-; X64-LFENCE-NEXT: .cfi_offset %r14, -16
+; X64-LFENCE-NEXT: .cfi_offset %rbx, -16
; X64-LFENCE-NEXT: testl %edi, %edi
; X64-LFENCE-NEXT: jne .LBB1_6
; X64-LFENCE-NEXT: # %bb.1: # %then1
@@ -125,9 +123,9 @@ define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, ptr %ptr1, ptr %ptr2,
; X64-LFENCE-NEXT: je .LBB1_3
; X64-LFENCE-NEXT: # %bb.4: # %else3
; X64-LFENCE-NEXT: lfence
-; X64-LFENCE-NEXT: movslq (%r9), %rax
-; X64-LFENCE-NEXT: leaq (%r8,%rax,4), %rbx
-; X64-LFENCE-NEXT: movl %eax, (%r8,%rax,4)
+; X64-LFENCE-NEXT: movslq (%r9), %rcx
+; X64-LFENCE-NEXT: leaq (%r8,%rcx,4), %rax
+; X64-LFENCE-NEXT: movl %ecx, (%r8,%rcx,4)
; X64-LFENCE-NEXT: jmp .LBB1_5
; X64-LFENCE-NEXT: .LBB1_3: # %then3
; X64-LFENCE-NEXT: lfence
@@ -138,19 +136,18 @@ define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, ptr %ptr1, ptr %ptr2,
; X64-LFENCE-NEXT: movq (%r9), %rbx
; X64-LFENCE-NEXT: addl (%rbx), %esi
; X64-LFENCE-NEXT: # kill: def $edi killed $edi killed $rdi
-; X64-LFENCE-NEXT: movq %r8, %r14
+; X64-LFENCE-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-LFENCE-NEXT: callq leak at PLT
-; X64-LFENCE-NEXT: movq %r14, %r8
+; X64-LFENCE-NEXT: movq %rbx, %rax
+; X64-LFENCE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
; X64-LFENCE-NEXT: .LBB1_5: # %merge
-; X64-LFENCE-NEXT: movslq (%rbx), %rax
+; X64-LFENCE-NEXT: movslq (%rax), %rax
; X64-LFENCE-NEXT: movl $0, (%r8,%rax,4)
; X64-LFENCE-NEXT: .LBB1_6: # %exit
; X64-LFENCE-NEXT: lfence
-; X64-LFENCE-NEXT: addq $8, %rsp
-; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24
-; X64-LFENCE-NEXT: popq %rbx
+; X64-LFENCE-NEXT: addq $16, %rsp
; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16
-; X64-LFENCE-NEXT: popq %r14
+; X64-LFENCE-NEXT: popq %rbx
; X64-LFENCE-NEXT: .cfi_def_cfa_offset 8
; X64-LFENCE-NEXT: retq
entry:
@@ -507,17 +504,11 @@ define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_harden
; X64: # %bb.0: # %entry
; X64-NEXT: pushq %rbp
; X64-NEXT: .cfi_def_cfa_offset 16
-; X64-NEXT: pushq %r15
-; X64-NEXT: .cfi_def_cfa_offset 24
-; X64-NEXT: pushq %r14
-; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: pushq %rbx
-; X64-NEXT: .cfi_def_cfa_offset 40
-; X64-NEXT: pushq %rax
+; X64-NEXT: .cfi_def_cfa_offset 24
+; X64-NEXT: subq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 48
-; X64-NEXT: .cfi_offset %rbx, -40
-; X64-NEXT: .cfi_offset %r14, -32
-; X64-NEXT: .cfi_offset %r15, -24
+; X64-NEXT: .cfi_offset %rbx, -24
; X64-NEXT: .cfi_offset %rbp, -16
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: movq $-1, %rbx
@@ -525,10 +516,10 @@ define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_harden
; X64-NEXT: cmpl $41, %edi
; X64-NEXT: jg .LBB4_1
; X64-NEXT: # %bb.2: # %thrower
-; X64-NEXT: movq %rdx, %r14
+; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: cmovgq %rbx, %rax
; X64-NEXT: movslq %edi, %rcx
-; X64-NEXT: movq %rsi, %r15
+; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movl (%rsi,%rcx,4), %ebp
; X64-NEXT: orl %eax, %ebp
; X64-NEXT: movl $4, %edi
@@ -542,7 +533,7 @@ define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_harden
; X64-NEXT: cmpq $.Lslh_ret_addr4, %rdx
; X64-NEXT: cmovneq %rbx, %rcx
; X64-NEXT: movl %ebp, (%rax)
-; X64-NEXT: .Ltmp0:
+; X64-NEXT: .Ltmp0: # EH_LABEL
; X64-NEXT: shlq $47, %rcx
; X64-NEXT: movq %rax, %rdi
; X64-NEXT: xorl %esi, %esi
@@ -555,34 +546,32 @@ define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_harden
; X64-NEXT: sarq $63, %rax
; X64-NEXT: cmpq $.Lslh_ret_addr5, %rcx
; X64-NEXT: cmovneq %rbx, %rax
-; X64-NEXT: .Ltmp1:
+; X64-NEXT: .Ltmp1: # EH_LABEL
; X64-NEXT: jmp .LBB4_3
; X64-NEXT: .LBB4_1:
; X64-NEXT: cmovleq %rbx, %rax
; X64-NEXT: .LBB4_3: # %exit
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
-; X64-NEXT: addq $8, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 40
-; X64-NEXT: popq %rbx
-; X64-NEXT: .cfi_def_cfa_offset 32
-; X64-NEXT: popq %r14
+; X64-NEXT: addq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 24
-; X64-NEXT: popq %r15
+; X64-NEXT: popq %rbx
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: popq %rbp
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
; X64-NEXT: .LBB4_4: # %lpad
; X64-NEXT: .cfi_def_cfa_offset 48
-; X64-NEXT: .Ltmp2:
+; X64-NEXT: .Ltmp2: # EH_LABEL
; X64-NEXT: movq %rsp, %rcx
; X64-NEXT: sarq $63, %rcx
-; X64-NEXT: movl (%rax), %eax
-; X64-NEXT: addl (%r15), %eax
-; X64-NEXT: cltq
-; X64-NEXT: orq %rcx, %rax
-; X64-NEXT: movl (%r14,%rax,4), %edi
+; X64-NEXT: movl (%rax), %edx
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT: addl (%rax), %edx
+; X64-NEXT: movslq %edx, %rdx
+; X64-NEXT: orq %rcx, %rdx
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT: movl (%rax,%rdx,4), %edi
; X64-NEXT: orl %ecx, %edi
; X64-NEXT: shlq $47, %rcx
; X64-NEXT: orq %rcx, %rsp
@@ -616,12 +605,12 @@ define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_harden
; X64-LFENCE-NEXT: movl $4, %edi
; X64-LFENCE-NEXT: callq __cxa_allocate_exception at PLT
; X64-LFENCE-NEXT: movl %ebp, (%rax)
-; X64-LFENCE-NEXT: .Ltmp0:
+; X64-LFENCE-NEXT: .Ltmp0: # EH_LABEL
; X64-LFENCE-NEXT: movq %rax, %rdi
; X64-LFENCE-NEXT: xorl %esi, %esi
; X64-LFENCE-NEXT: xorl %edx, %edx
; X64-LFENCE-NEXT: callq __cxa_throw at PLT
-; X64-LFENCE-NEXT: .Ltmp1:
+; X64-LFENCE-NEXT: .Ltmp1: # EH_LABEL
; X64-LFENCE-NEXT: .LBB4_2: # %exit
; X64-LFENCE-NEXT: lfence
; X64-LFENCE-NEXT: popq %rbx
@@ -633,7 +622,7 @@ define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_harden
; X64-LFENCE-NEXT: retq
; X64-LFENCE-NEXT: .LBB4_3: # %lpad
; X64-LFENCE-NEXT: .cfi_def_cfa_offset 32
-; X64-LFENCE-NEXT: .Ltmp2:
+; X64-LFENCE-NEXT: .Ltmp2: # EH_LABEL
; X64-LFENCE-NEXT: movl (%rax), %eax
; X64-LFENCE-NEXT: addl (%r14), %eax
; X64-LFENCE-NEXT: cltq
diff --git a/llvm/test/CodeGen/X86/split-reg-with-hint.ll b/llvm/test/CodeGen/X86/split-reg-with-hint.ll
index 689f83ff0adc4..85d5dbc123873 100644
--- a/llvm/test/CodeGen/X86/split-reg-with-hint.ll
+++ b/llvm/test/CodeGen/X86/split-reg-with-hint.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -regalloc-csr-cost-scale=0 | FileCheck %s
; %ptr has a hint to %rdi in entry block, it also has a interference with %rdi
; in block if.then. It should be split in cold block if.then.
diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll
index 2a0710e3249a6..a7114b3d10434 100644
--- a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll
+++ b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll
@@ -68,7 +68,7 @@ define void @test_mixed(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(
; CHECK-PREG: $rdx = COPY killed renamable $r14
; CHECK-PREG: dead $ecx = MOV32r0 implicit-def dead $eflags, implicit-def $rcx
; CHECK-PREG: $r8 = COPY killed renamable $rbx
-; CHECK-PREG: CALL64pcrel32 @consume5, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit killed $rcx, implicit killed $r8, implicit-def $rsp, implicit-def $ssp
+; CHECK-PREG: CALL64pcrel32 @consume5, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit $rcx, implicit $r8, implicit-def $rsp, implicit-def $ssp
entry:
%safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @func, i32 0, i32 0, i32 0, i32 0) ["gc-live" (ptr addrspace(1) %a, ptr addrspace(1) null, ptr addrspace(1) %b, ptr addrspace(1) null, ptr addrspace(1) %c)]
@@ -168,7 +168,7 @@ define void @test_gcrelocate_uniqueing(ptr addrspace(1) %ptr) gc "statepoint-exa
; CHECK-PREG: renamable $rbx = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 2, killed renamable $rbx, 2, 4278124286, 2, 1, renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp
; CHECK-PREG: $rdi = COPY renamable $rbx
; CHECK-PREG: $rsi = COPY killed renamable $rbx
-; CHECK-PREG: CALL64pcrel32 @consume2, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit killed $rsi, implicit-def $rsp, implicit-def $ssp
+; CHECK-PREG: CALL64pcrel32 @consume2, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit-def $rsp, implicit-def $ssp
%tok = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @func, i32 0, i32 0, i32 0, i32 0) ["deopt" (ptr addrspace(1) %ptr, i32 undef), "gc-live" (ptr addrspace(1) %ptr, ptr addrspace(1) %ptr)]
%a = call ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %tok, i32 0, i32 0)
@@ -194,7 +194,7 @@ define void @test_gcptr_uniqueing(ptr addrspace(1) %ptr) gc "statepoint-example"
; CHECK-PREG: renamable $rbx = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 2, killed renamable $rbx, 2, 4278124286, 2, 1, renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp
; CHECK-PREG: $rdi = COPY renamable $rbx
; CHECK-PREG: $rsi = COPY killed renamable $rbx
-; CHECK-PREG: CALL64pcrel32 @use1, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit killed $rsi, implicit-def $rsp, implicit-def $ssp
+; CHECK-PREG: CALL64pcrel32 @use1, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit-def $rsp, implicit-def $ssp
%tok = tail call token (i64, i32, ptr, i32, i32, ...)
@llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @func, i32 0, i32 0, i32 0, i32 0) ["deopt" (ptr addrspace(1) %ptr, i32 undef), "gc-live" (ptr addrspace(1) %ptr, ptr addrspace(1) %ptr)]
diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll b/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll
index f08b388c3dc56..b83e58350f66d 100644
--- a/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll
+++ b/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll
@@ -199,7 +199,7 @@ define void @test_duplicate_ir_values() gc "statepoint-example" personality ptr
; CHECK-NEXT: renamable $rbx = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0)
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-NEXT: $edi = MOV32ri 10
- ; CHECK-NEXT: dead renamable $rbx = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @__llvm_deoptimize, killed $edi, 2, 0, 2, 2, 2, 2, killed renamable $rbx, renamable $rbx, 2, 1, renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp
+ ; CHECK-NEXT: dead renamable $rbx = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @__llvm_deoptimize, $edi, 2, 0, 2, 2, 2, 2, killed renamable $rbx, renamable $rbx, 2, 1, renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.exceptional_return (landing-pad):
@@ -209,7 +209,7 @@ define void @test_duplicate_ir_values() gc "statepoint-example" personality ptr
; CHECK-NEXT: renamable $rbx = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0)
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-NEXT: $edi = MOV32ri -271
- ; CHECK-NEXT: dead renamable $rbx = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @__llvm_deoptimize, killed $edi, 2, 0, 2, 0, 2, 1, killed renamable $rbx, 2, 1, renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp
+ ; CHECK-NEXT: dead renamable $rbx = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @__llvm_deoptimize, $edi, 2, 0, 2, 0, 2, 1, killed renamable $rbx, 2, 1, renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
entry:
%val1 = load ptr addrspace(1), ptr addrspace(1) undef, align 8
diff --git a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
index f3e6d8b5489c3..ad655bf366909 100644
--- a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
+++ b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -regalloc-csr-cost-scale=0 | FileCheck %s
; Function Attrs: nounwind uwtable
define void @tail_dup_merge_loops(i32 %a, ptr %b, ptr %c) local_unnamed_addr #0 {
diff --git a/llvm/test/CodeGen/X86/tail-opts.ll b/llvm/test/CodeGen/X86/tail-opts.ll
index d9ab2f7d1f5fb..6ee16149e0939 100644
--- a/llvm/test/CodeGen/X86/tail-opts.ll
+++ b/llvm/test/CodeGen/X86/tail-opts.ll
@@ -239,70 +239,73 @@ bb30:
define fastcc void @c_expand_expr_stmt(ptr %expr) nounwind {
; CHECK-LABEL: c_expand_expr_stmt:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB3_9
; CHECK-NEXT: # %bb.1: # %entry
-; CHECK-NEXT: movzbl 0, %ebx
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: movzbl 0, %eax
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testb %cl, %cl
; CHECK-NEXT: jne .LBB3_8
; CHECK-NEXT: # %bb.2: # %bb.i
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testb %cl, %cl
; CHECK-NEXT: je .LBB3_8
; CHECK-NEXT: # %bb.3: # %lvalue_p.exit
-; CHECK-NEXT: movq 0, %rax
-; CHECK-NEXT: movzbl (%rax), %ecx
-; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: movq 0, %rcx
+; CHECK-NEXT: movzbl (%rcx), %edx
+; CHECK-NEXT: testl %edx, %edx
; CHECK-NEXT: je .LBB3_10
; CHECK-NEXT: # %bb.4: # %lvalue_p.exit
-; CHECK-NEXT: cmpl $2, %ecx
+; CHECK-NEXT: cmpl $2, %edx
; CHECK-NEXT: jne .LBB3_15
; CHECK-NEXT: # %bb.5: # %bb.i1
-; CHECK-NEXT: movq 32(%rax), %rax
-; CHECK-NEXT: movzbl 16(%rax), %ecx
-; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: movq 32(%rcx), %rcx
+; CHECK-NEXT: movzbl 16(%rcx), %edx
+; CHECK-NEXT: testl %edx, %edx
; CHECK-NEXT: je .LBB3_13
; CHECK-NEXT: # %bb.6: # %bb.i1
-; CHECK-NEXT: cmpl $2, %ecx
+; CHECK-NEXT: cmpl $2, %edx
; CHECK-NEXT: jne .LBB3_15
; CHECK-NEXT: # %bb.7: # %bb.i.i
+; CHECK-NEXT: pushq %rax
; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-NEXT: callq lvalue_p at PLT
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: setne %al
+; CHECK-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-NEXT: setne %cl
+; CHECK-NEXT: addq $8, %rsp
; CHECK-NEXT: jmp .LBB3_16
; CHECK-NEXT: .LBB3_8: # %bb1
-; CHECK-NEXT: cmpb $23, %bl
+; CHECK-NEXT: cmpb $23, %al
; CHECK-NEXT: .LBB3_9: # %bb3
; CHECK-NEXT: .LBB3_15:
-; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: jmp .LBB3_16
; CHECK-NEXT: .LBB3_10: # %bb2.i3
-; CHECK-NEXT: movq 8(%rax), %rax
-; CHECK-NEXT: movzbl 16(%rax), %ecx
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: cmpl $23, %ecx
+; CHECK-NEXT: movq 8(%rcx), %rcx
+; CHECK-NEXT: movzbl 16(%rcx), %edx
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: cmpl $23, %edx
; CHECK-NEXT: je .LBB3_16
; CHECK-NEXT: # %bb.11: # %bb2.i3
-; CHECK-NEXT: cmpl $16, %ecx
+; CHECK-NEXT: cmpl $16, %edx
; CHECK-NEXT: je .LBB3_16
; CHECK-NEXT: jmp .LBB3_9
; CHECK-NEXT: .LBB3_13: # %bb2.i.i2
-; CHECK-NEXT: movq 8(%rax), %rax
-; CHECK-NEXT: movzbl 16(%rax), %ecx
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: cmpl $16, %ecx
+; CHECK-NEXT: movq 8(%rcx), %rcx
+; CHECK-NEXT: movzbl 16(%rcx), %edx
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: cmpl $16, %edx
; CHECK-NEXT: je .LBB3_16
; CHECK-NEXT: # %bb.14: # %bb2.i.i2
-; CHECK-NEXT: cmpl $23, %ecx
+; CHECK-NEXT: cmpl $23, %edx
; CHECK-NEXT: jne .LBB3_9
; CHECK-NEXT: .LBB3_16: # %lvalue_p.exit4
-; CHECK-NEXT: testb %bl, %bl
-; CHECK-NEXT: sete %cl
-; CHECK-NEXT: orb %al, %cl
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: orb %cl, %al
entry:
%tmp4 = load i8, ptr null, align 8 ; <i8> [#uses=3]
switch i8 %tmp4, label %bb3 [
diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
index 23efdba9bcdeb..bd760f62817a0 100644
--- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
@@ -1363,9 +1363,6 @@ define i32 @irreducibleCFG() #4 {
; ENABLE-NEXT: .cfi_offset %rbp, -16
; ENABLE-NEXT: movq %rsp, %rbp
; ENABLE-NEXT: .cfi_def_cfa_register %rbp
-; ENABLE-NEXT: pushq %rbx
-; ENABLE-NEXT: pushq %rax
-; ENABLE-NEXT: .cfi_offset %rbx, -24
; ENABLE-NEXT: movq _irreducibleCFGf at GOTPCREL(%rip), %rax
; ENABLE-NEXT: cmpb $0, (%rax)
; ENABLE-NEXT: je LBB16_2
@@ -1376,26 +1373,22 @@ define i32 @irreducibleCFG() #4 {
; ENABLE-NEXT: LBB16_2: ## %split
; ENABLE-NEXT: movq _irreducibleCFGb at GOTPCREL(%rip), %rax
; ENABLE-NEXT: cmpl $0, (%rax)
-; ENABLE-NEXT: je LBB16_3
-; ENABLE-NEXT: ## %bb.4: ## %for.body4.i
+; ENABLE-NEXT: je LBB16_4
+; ENABLE-NEXT: ## %bb.3: ## %for.body4.i
; ENABLE-NEXT: movq _irreducibleCFGa at GOTPCREL(%rip), %rax
; ENABLE-NEXT: movl (%rax), %edi
-; ENABLE-NEXT: xorl %ebx, %ebx
; ENABLE-NEXT: xorl %eax, %eax
; ENABLE-NEXT: callq _something
-; ENABLE-NEXT: jmp LBB16_5
-; ENABLE-NEXT: LBB16_3:
-; ENABLE-NEXT: xorl %ebx, %ebx
+; ENABLE-NEXT: LBB16_4: ## %for.inc
+; ENABLE-NEXT: xorl %eax, %eax
; ENABLE-NEXT: .p2align 4
; ENABLE-NEXT: LBB16_5: ## %for.inc
; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1
-; ENABLE-NEXT: incl %ebx
-; ENABLE-NEXT: cmpl $7, %ebx
+; ENABLE-NEXT: incl %eax
+; ENABLE-NEXT: cmpl $7, %eax
; ENABLE-NEXT: jl LBB16_5
; ENABLE-NEXT: ## %bb.6: ## %fn1.exit
; ENABLE-NEXT: xorl %eax, %eax
-; ENABLE-NEXT: addq $8, %rsp
-; ENABLE-NEXT: popq %rbx
; ENABLE-NEXT: popq %rbp
; ENABLE-NEXT: retq
;
@@ -1406,9 +1399,6 @@ define i32 @irreducibleCFG() #4 {
; DISABLE-NEXT: .cfi_offset %rbp, -16
; DISABLE-NEXT: movq %rsp, %rbp
; DISABLE-NEXT: .cfi_def_cfa_register %rbp
-; DISABLE-NEXT: pushq %rbx
-; DISABLE-NEXT: pushq %rax
-; DISABLE-NEXT: .cfi_offset %rbx, -24
; DISABLE-NEXT: movq _irreducibleCFGf at GOTPCREL(%rip), %rax
; DISABLE-NEXT: cmpb $0, (%rax)
; DISABLE-NEXT: je LBB16_2
@@ -1419,26 +1409,22 @@ define i32 @irreducibleCFG() #4 {
; DISABLE-NEXT: LBB16_2: ## %split
; DISABLE-NEXT: movq _irreducibleCFGb at GOTPCREL(%rip), %rax
; DISABLE-NEXT: cmpl $0, (%rax)
-; DISABLE-NEXT: je LBB16_3
-; DISABLE-NEXT: ## %bb.4: ## %for.body4.i
+; DISABLE-NEXT: je LBB16_4
+; DISABLE-NEXT: ## %bb.3: ## %for.body4.i
; DISABLE-NEXT: movq _irreducibleCFGa at GOTPCREL(%rip), %rax
; DISABLE-NEXT: movl (%rax), %edi
-; DISABLE-NEXT: xorl %ebx, %ebx
; DISABLE-NEXT: xorl %eax, %eax
; DISABLE-NEXT: callq _something
-; DISABLE-NEXT: jmp LBB16_5
-; DISABLE-NEXT: LBB16_3:
-; DISABLE-NEXT: xorl %ebx, %ebx
+; DISABLE-NEXT: LBB16_4: ## %for.inc
+; DISABLE-NEXT: xorl %eax, %eax
; DISABLE-NEXT: .p2align 4
; DISABLE-NEXT: LBB16_5: ## %for.inc
; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1
-; DISABLE-NEXT: incl %ebx
-; DISABLE-NEXT: cmpl $7, %ebx
+; DISABLE-NEXT: incl %eax
+; DISABLE-NEXT: cmpl $7, %eax
; DISABLE-NEXT: jl LBB16_5
; DISABLE-NEXT: ## %bb.6: ## %fn1.exit
; DISABLE-NEXT: xorl %eax, %eax
-; DISABLE-NEXT: addq $8, %rsp
-; DISABLE-NEXT: popq %rbx
; DISABLE-NEXT: popq %rbp
; DISABLE-NEXT: retq
entry:
>From b8bea2d53a212966c5af69f046f59bfcf4f421fe Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Wed, 8 Apr 2026 15:42:41 -0700
Subject: [PATCH 2/4] If PPX is implemented, push/pop doesn't access memory, so
skip this optimization.
---
llvm/include/llvm/CodeGen/TargetRegisterInfo.h | 4 +++-
llvm/lib/CodeGen/RegAllocGreedy.cpp | 2 +-
llvm/lib/Target/AArch64/AArch64RegisterInfo.h | 2 +-
llvm/lib/Target/X86/X86RegisterInfo.cpp | 10 ++++++++++
llvm/lib/Target/X86/X86RegisterInfo.h | 5 +----
5 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index ecf027d97cc15..0b218743eb74e 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -1041,7 +1041,9 @@ class LLVM_ABI TargetRegisterInfo : public MCRegisterInfo {
/// Allow the target to override the cost of using a callee-saved register for
/// the first time. Default value of 0 means we will use a callee-saved
/// register if it is available.
- virtual unsigned getCSRFirstUseCost() const { return 0; }
+ virtual unsigned getCSRFirstUseCost(const MachineFunction &MF) const {
+ return 0;
+ }
/// FIXME: We should deprecate this usage.
virtual unsigned getCSRCost() const { return 0; }
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index f2f1897795890..fbe5f3a332c44 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -2449,7 +2449,7 @@ void RAGreedy::initializeCSRCost() {
}
} else {
uint64_t EntryFreq = MBFI->getEntryFreq().getFrequency();
- CSRCost = BlockFrequency(TRI->getCSRFirstUseCost() * EntryFreq);
+ CSRCost = BlockFrequency(TRI->getCSRFirstUseCost(*MF) * EntryFreq);
if (CSRCostScale < 100)
CSRCost *= BranchProbability(CSRCostScale, 100);
else
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index ac58d8d6b1cc7..c9d630be93bed 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -59,7 +59,7 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo {
// cold path instead of using a callee-saved register.
return 5;
}
- unsigned getCSRFirstUseCost() const override {
+ unsigned getCSRFirstUseCost(const MachineFunction &MF) const override {
// The cost of 2 means push and pop for each CSR.
return 2;
}
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index c84e0f441a459..fb940aaa8ab71 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -1281,3 +1281,13 @@ bool X86RegisterInfo::isNonRex2RegClass(const TargetRegisterClass *RC) const {
return true;
}
}
+
+unsigned X86RegisterInfo::getCSRFirstUseCost(const MachineFunction &MF) const {
+ // If PPX is implemented, push/pop pairs doesn't access memory.
+ const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
+ if (ST.is64Bit() && ST.hasPPX())
+ return 0;
+
+ // push + pop.
+ return 2;
+}
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h
index 134089ed91f8d..1418e2892768a 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -183,10 +183,7 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
return true;
}
- unsigned getCSRFirstUseCost() const override {
- // push + pop.
- return 2;
- }
+ unsigned getCSRFirstUseCost(const MachineFunction &MF) const override;
};
} // End llvm namespace
>From 4951a609f9d30dfa5ba91b1ca1c59a38c41588c2 Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Fri, 24 Apr 2026 15:02:06 -0700
Subject: [PATCH 3/4] Fix the new test case.
---
llvm/test/CodeGen/X86/apx/pr191368.ll | 22 ++++++++++------------
1 file changed, 10 insertions(+), 12 deletions(-)
diff --git a/llvm/test/CodeGen/X86/apx/pr191368.ll b/llvm/test/CodeGen/X86/apx/pr191368.ll
index 20f3dae254598..62fc0dc18a447 100644
--- a/llvm/test/CodeGen/X86/apx/pr191368.ll
+++ b/llvm/test/CodeGen/X86/apx/pr191368.ll
@@ -276,13 +276,12 @@ define i32 @pr190962(ptr %a, ptr %b, ptr %c, i64 %d, i64 %e, i64 %f) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: pushq %r14
-; CHECK-NEXT: pushq %r13
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: pushq %rsi
; CHECK-NEXT: pushq %rdi
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: subq $64, %rsp
; CHECK-NEXT: movq %r9, %rdi
; CHECK-NEXT: movq %rdx, %rbx
; CHECK-NEXT: movq %rcx, %rsi
@@ -292,19 +291,20 @@ define i32 @pr190962(ptr %a, ptr %b, ptr %c, i64 %d, i64 %e, i64 %f) nounwind {
; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: je .LBB3_3
; CHECK-NEXT: # %bb.1: # %l1
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r13
-; CHECK-NEXT: movq 0, %r15
-; CHECK-NEXT: callq f2
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r15
; CHECK-NEXT: movq 0, %r12
+; CHECK-NEXT: callq f2
+; CHECK-NEXT: movq 0, %rcx
; CHECK-NEXT: xorl %ebp, %ebp
-; CHECK-NEXT: orq %r13, %r15
+; CHECK-NEXT: orq %r12, %r15
; CHECK-NEXT: jne .LBB3_2
; CHECK-NEXT: # %bb.4: # %l2
-; CHECK-NEXT: movq %rax, %r14
+; CHECK-NEXT: movq %rcx, %r15
; CHECK-NEXT: movq %rbx, %rcx
+; CHECK-NEXT: movq %rax, %rbx
; CHECK-NEXT: callq f4
; CHECK-NEXT: movl $1, %ebp
-; CHECK-NEXT: orq %r12, %r14
+; CHECK-NEXT: orq %r15, %rbx
; CHECK-NEXT: je .LBB3_5
; CHECK-NEXT: .LBB3_2: # %common.ret1.sink.split
; CHECK-NEXT: callq f3
@@ -316,23 +316,21 @@ define i32 @pr190962(ptr %a, ptr %b, ptr %c, i64 %d, i64 %e, i64 %f) nounwind {
; CHECK-NEXT: callq f3
; CHECK-NEXT: .LBB3_3: # %common.ret1
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: addq $56, %rsp
+; CHECK-NEXT: addq $64, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: popq %rdi
; CHECK-NEXT: popq %rsi
; CHECK-NEXT: popq %r12
-; CHECK-NEXT: popq %r13
; CHECK-NEXT: popq %r14
; CHECK-NEXT: popq %r15
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB3_5: # %l3
; CHECK-NEXT: testq %rdi, %rdi
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
; CHECK-NEXT: je .LBB3_3
; CHECK-NEXT: # %bb.6: # %l4
; CHECK-NEXT: movl $0, (%rsi)
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %r14, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: xorl %ecx, %ecx
>From 90799b047f11cdd2f41a63ecdc5f0e3a685ed454 Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Thu, 14 May 2026 20:40:54 +0000
Subject: [PATCH 4/4] Fix comment.
---
llvm/lib/Target/X86/X86RegisterInfo.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index fb940aaa8ab71..3bc85ef072fb7 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -1283,7 +1283,7 @@ bool X86RegisterInfo::isNonRex2RegClass(const TargetRegisterClass *RC) const {
}
unsigned X86RegisterInfo::getCSRFirstUseCost(const MachineFunction &MF) const {
- // If PPX is implemented, push/pop pairs doesn't access memory.
+ // If PPX is implemented, push/pop pairs don't access memory.
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
if (ST.is64Bit() && ST.hasPPX())
return 0;
More information about the cfe-commits
mailing list