[llvm] [X86] Stop emitting CFI instructions on i386-windows (PR #135648)

Fri Mar 6 20:54:10 PST 2026

https://github.com/s-barannikov updated https://github.com/llvm/llvm-project/pull/135648

>From 94600902a04188b8138f8789aec258f74af44255 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Mon, 14 Apr 2025 20:04:04 +0300
Subject: [PATCH] [X86] Stop emitting CFI instructions on i386-windows

`X86FrameLowering` normally emits CFI instructions when `needsDwarfCFI`
returns true. Before this patch it was assumed that this method returns
true on non-Windows target, but it also returns true on Windows i386,
which results in erroneous generation of CFI instructions on that
platform.

This behavior cannot be observed in the generated assembly because
AsmPrinter suppresses printing of these instructions for WinEH exception
model. I'm going to change this: the idea is that if a target has
created a CFI instruction, it should be printed. If it should not
be printed, it should not have been created in the first place.

There was a couple of places where `needsDwarfCFI` wasn't used, also
resulting in erroneous generation of CFI instruction. Now fixed as well.

The changes in tests seem to be caused by `SlotIndexes` assigning
different numbers to instructions, which affects live range lengths
and consequently the register allocator heuristics.
---
 .../Target/X86/X86CallFrameOptimization.cpp   |  2 +-
 llvm/lib/Target/X86/X86FrameLowering.cpp      |  9 ++-
 llvm/lib/Target/X86/X86FrameLowering.h        |  4 +-
 llvm/test/CodeGen/MIR/X86/diexpr-win32.mir    |  2 -
 llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll  | 14 ++--
 llvm/test/CodeGen/X86/andnot-patterns.ll      | 20 ++---
 .../test/CodeGen/X86/fp128-libcalls-strict.ll | 12 +--
 llvm/test/CodeGen/X86/optimize-max-0.ll       |  6 +-
 llvm/test/CodeGen/X86/sbb-false-dep.ll        | 22 +++---
 llvm/test/CodeGen/X86/sdiv_fix.ll             | 74 +++++++++----------
 10 files changed, 81 insertions(+), 84 deletions(-)

diff --git a/llvm/lib/Target/X86/X86CallFrameOptimization.cpp b/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
index 50f52e1c7a795..5cc7f5a1f331d 100644
--- a/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -573,7 +573,7 @@ void X86CallFrameOptimizationImpl::adjustCallSequence(
     // For debugging, when using SP-based CFA, we need to adjust the CFA
     // offset after each push.
     // TODO: This is needed only if we require precise CFA.
-    if (!TFL->hasFP(MF))
+    if (TFL->needsDwarfCFI(MF) && !TFL->hasFP(MF))
       TFL->BuildCFI(
           MBB, std::next(Push), DL,
           MCCFIInstruction::createAdjustCfaOffset(nullptr, SlotSize));
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 8bca6344d6521..2849a81c9f262 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -1468,7 +1468,9 @@ bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
 }
 
 bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
-  return !isWin64Prologue(MF) && MF.needsFrameMoves();
+  return MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() !=
+             ExceptionHandling::WinEH &&
+         MF.needsFrameMoves();
 }
 
 /// Return true if an opcode is part of the REP group of instructions
@@ -3815,8 +3817,7 @@ MachineBasicBlock::iterator X86FrameLowering::eliminateCallFramePseudoInstr(
     Amount = alignTo(Amount, getStackAlign());
 
     const Function &F = MF.getFunction();
-    bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
-    bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
+    bool DwarfCFI = needsDwarfCFI(MF);
 
     // If we have any exception handlers in this function, and we adjust
     // the SP before calls, we may need to indicate this to the unwinder
@@ -3825,7 +3826,7 @@ MachineBasicBlock::iterator X86FrameLowering::eliminateCallFramePseudoInstr(
     // GNU_ARGS_SIZE.
     // TODO: We don't need to reset this between subsequent functions,
     // if it didn't change.
-    bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
+    bool HasDwarfEHHandlers = DwarfCFI && !MF.getLandingPads().empty();
 
     if (HasDwarfEHHandlers && !isDestroy &&
         MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index f1e3796f5fddd..6c6adc6cc035d 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -238,14 +238,14 @@ class X86FrameLowering : public TargetFrameLowering {
   /// frame of the top of stack function) as part of it's ABI.
   bool has128ByteRedZone(const MachineFunction& MF) const;
 
+  bool needsDwarfCFI(const MachineFunction &MF) const;
+
 protected:
   bool hasFPImpl(const MachineFunction &MF) const override;
 
 private:
   bool isWin64Prologue(const MachineFunction &MF) const;
 
-  bool needsDwarfCFI(const MachineFunction &MF) const;
-
   uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
 
   /// Emit target stack probe as a call to a helper function
diff --git a/llvm/test/CodeGen/MIR/X86/diexpr-win32.mir b/llvm/test/CodeGen/MIR/X86/diexpr-win32.mir
index 054c34996d03e..10eb277657468 100644
--- a/llvm/test/CodeGen/MIR/X86/diexpr-win32.mir
+++ b/llvm/test/CodeGen/MIR/X86/diexpr-win32.mir
@@ -190,8 +190,6 @@ body:             |
     liveins: $esi
 
     frame-setup PUSH32r killed $esi, implicit-def $esp, implicit $esp
-    CFI_INSTRUCTION def_cfa_offset 8
-    CFI_INSTRUCTION offset $esi, -8
     $esi = MOV32rm $esp, 1, _, 8, _ :: (load (s32) from %fixed-stack.2)
     DBG_VALUE $esp, 0, !26, !10, debug-location !25
     DBG_VALUE $esp, 0, !23, !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref), debug-location !25
diff --git a/llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll b/llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll
index b32afdc2214e0..a3a88bd07e65c 100644
--- a/llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll
+++ b/llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll
@@ -28,20 +28,20 @@ define i16 @SQLDriversW(ptr %henv, i16 zeroext  %fDir, ptr %szDrvDesc, i16 signe
 ; CHECK-NEXT:  ## %bb.4: ## %bb37
 ; CHECK-NEXT:    movw $0, 40(%edi)
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    leal (,%ecx,4), %ecx
-; CHECK-NEXT:    leal (,%ebx,4), %edx
+; CHECK-NEXT:    leal (,%ecx,4), %eax
+; CHECK-NEXT:    leal (,%ebx,4), %ecx
 ; CHECK-NEXT:    subl $12, %esp
-; CHECK-NEXT:    movzwl %bp, %eax
+; CHECK-NEXT:    movzwl %bp, %edx
+; CHECK-NEXT:    cwtl
 ; CHECK-NEXT:    movswl %cx, %ecx
-; CHECK-NEXT:    movswl %dx, %edx
 ; CHECK-NEXT:    pushl $87
 ; CHECK-NEXT:    pushl {{[0-9]+}}(%esp)
-; CHECK-NEXT:    pushl %ecx
+; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    pushl $0
 ; CHECK-NEXT:    pushl {{[0-9]+}}(%esp)
-; CHECK-NEXT:    pushl %edx
+; CHECK-NEXT:    pushl %ecx
 ; CHECK-NEXT:    pushl $0
-; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    pushl %edx
 ; CHECK-NEXT:    pushl %edi
 ; CHECK-NEXT:    calll _SQLDrivers_Internal
 ; CHECK-NEXT:    addl $48, %esp
diff --git a/llvm/test/CodeGen/X86/andnot-patterns.ll b/llvm/test/CodeGen/X86/andnot-patterns.ll
index fc573fbd4fc99..370f86dad0427 100644
--- a/llvm/test/CodeGen/X86/andnot-patterns.ll
+++ b/llvm/test/CodeGen/X86/andnot-patterns.ll
@@ -198,28 +198,28 @@ define i64 @andnot_rotl_i64_multiuse_rot(i64 %a0, i64 %a1, i64 %a2) nounwind {
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    notl %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    notl %eax
 ; X86-NEXT:    notl %esi
 ; X86-NEXT:    testb $32, %cl
 ; X86-NEXT:    jne .LBB4_1
 ; X86-NEXT:  # %bb.2:
-; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl %esi, %edx
 ; X86-NEXT:    jmp .LBB4_3
 ; X86-NEXT:  .LBB4_1:
-; X86-NEXT:    movl %edx, %eax
-; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    movl %esi, %eax
 ; X86-NEXT:  .LBB4_3:
-; X86-NEXT:    movl %edx, %ebx
-; X86-NEXT:    shldl %cl, %eax, %ebx
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    shldl %cl, %edx, %ebx
 ; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    shldl %cl, %eax, %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    andl %eax, %esi
+; X86-NEXT:    andl %edx, %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    andl %ebx, %edi
 ; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %edx
 ; X86-NEXT:    calll use_i64 at PLT
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    movl %esi, %eax
diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
index ad2d690fd7ed0..3871269a1c683 100644
--- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
+++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
@@ -3872,28 +3872,28 @@ define i64 @cmp_ueq_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    subl $12, %esp
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    calll __eqtf2
 ; X86-NEXT:    addl $32, %esp
 ; X86-NEXT:    testl %eax, %eax
 ; X86-NEXT:    sete %bl
+; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    calll __unordtf2
 ; X86-NEXT:    addl $32, %esp
@@ -4093,28 +4093,28 @@ define i64 @cmp_one_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    subl $12, %esp
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    calll __eqtf2
 ; X86-NEXT:    addl $32, %esp
 ; X86-NEXT:    testl %eax, %eax
 ; X86-NEXT:    setne %bl
+; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    calll __unordtf2
 ; X86-NEXT:    addl $32, %esp
diff --git a/llvm/test/CodeGen/X86/optimize-max-0.ll b/llvm/test/CodeGen/X86/optimize-max-0.ll
index b6af7e1641a9c..7921cb578a9da 100644
--- a/llvm/test/CodeGen/X86/optimize-max-0.ll
+++ b/llvm/test/CodeGen/X86/optimize-max-0.ll
@@ -560,14 +560,14 @@ define void @bar(ptr %r, i32 %s, i32 %w, i32 %x, ptr %j, i32 %d) nounwind {
 ; CHECK-NEXT:    cmpl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
 ; CHECK-NEXT:    jb LBB1_9
 ; CHECK-NEXT:  LBB1_13: ## %bb20
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; CHECK-NEXT:    cmpl $1, %esi
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; CHECK-NEXT:    cmpl $1, %ebx
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; CHECK-NEXT:    je LBB1_19
 ; CHECK-NEXT:  ## %bb.14: ## %bb20
-; CHECK-NEXT:    cmpl $3, %esi
+; CHECK-NEXT:    cmpl $3, %ebx
 ; CHECK-NEXT:    jne LBB1_24
 ; CHECK-NEXT:  ## %bb.15: ## %bb22
 ; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
diff --git a/llvm/test/CodeGen/X86/sbb-false-dep.ll b/llvm/test/CodeGen/X86/sbb-false-dep.ll
index 34a92cb58692b..d84b9856b8d82 100644
--- a/llvm/test/CodeGen/X86/sbb-false-dep.ll
+++ b/llvm/test/CodeGen/X86/sbb-false-dep.ll
@@ -58,34 +58,34 @@ define i32 @mallocbench_gs(ptr noundef %0, ptr noundef %1, i32 noundef %2, i32 n
 ; IDIOM-NEXT:    pushq %r14
 ; IDIOM-NEXT:    pushq %r12
 ; IDIOM-NEXT:    pushq %rbx
-; IDIOM-NEXT:    movl %r8d, %ebp
-; IDIOM-NEXT:    movl %ecx, %r14d
-; IDIOM-NEXT:    movl %edx, %r15d
-; IDIOM-NEXT:    movq %rsi, %rbx
+; IDIOM-NEXT:    movl %r8d, %ebx
+; IDIOM-NEXT:    movl %ecx, %ebp
+; IDIOM-NEXT:    movl %edx, %r14d
+; IDIOM-NEXT:    movq %rsi, %r15
 ; IDIOM-NEXT:    movq %rdi, %r12
 ; IDIOM-NEXT:    movq (%rsi), %rdi
 ; IDIOM-NEXT:    movq 8(%rsi), %rsi
-; IDIOM-NEXT:    movq %rbx, %rdx
+; IDIOM-NEXT:    movq %r15, %rdx
 ; IDIOM-NEXT:    callq foo1 at PLT
-; IDIOM-NEXT:    movq 8(%rbx), %rax
+; IDIOM-NEXT:    movq 8(%r15), %rax
 ; IDIOM-NEXT:    movq (%rax), %rax
-; IDIOM-NEXT:    movl %ebp, %ecx
+; IDIOM-NEXT:    movl %ebx, %ecx
 ; IDIOM-NEXT:    negl %ecx
 ; IDIOM-NEXT:    sbbq %r10, %r10
 ; IDIOM-NEXT:    orq %rax, %r10
-; IDIOM-NEXT:    cmpl $1, %ebp
+; IDIOM-NEXT:    cmpl $1, %ebx
 ; IDIOM-NEXT:    sbbq %r11, %r11
 ; IDIOM-NEXT:    orq %rax, %r11
 ; IDIOM-NEXT:    subq $8, %rsp
 ; IDIOM-NEXT:    movq %r12, %rdi
-; IDIOM-NEXT:    movl %r15d, %esi
-; IDIOM-NEXT:    movl %r14d, %edx
+; IDIOM-NEXT:    movl %r14d, %esi
+; IDIOM-NEXT:    movl %ebp, %edx
 ; IDIOM-NEXT:    xorl %ecx, %ecx
 ; IDIOM-NEXT:    xorl %r8d, %r8d
 ; IDIOM-NEXT:    xorl %r9d, %r9d
 ; IDIOM-NEXT:    pushq %r11
 ; IDIOM-NEXT:    pushq %r10
-; IDIOM-NEXT:    pushq %rbx
+; IDIOM-NEXT:    pushq %r15
 ; IDIOM-NEXT:    callq foo2 at PLT
 ; IDIOM-NEXT:    addq $32, %rsp
 ; IDIOM-NEXT:    popq %rbx
diff --git a/llvm/test/CodeGen/X86/sdiv_fix.ll b/llvm/test/CodeGen/X86/sdiv_fix.ll
index 392bc83d9d5d8..e02cdc1556f9d 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix.ll
@@ -557,20 +557,19 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    calll __divdi3
 ; X86-NEXT:    addl $16, %esp
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %edi, %eax
-; X86-NEXT:    sarl $31, %eax
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    sarl $31, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl %ebx, %ebp
 ; X86-NEXT:    shll $31, %ebp
-; X86-NEXT:    movl %ebx, %ecx
-; X86-NEXT:    shrl $31, %ecx
-; X86-NEXT:    shldl $31, %ebx, %ecx
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    shldl $31, %ebx, %eax
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %eax
-; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl %eax, %ebx
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %ecx
-; X86-NEXT:    movl %ecx, %ebx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    calll __moddi3
 ; X86-NEXT:    addl $16, %esp
@@ -584,58 +583,57 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    addl $16, %esp
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    sarl $31, %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    sarl $31, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %ebx
 ; X86-NEXT:    shll $31, %ebx
-; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %ecx, %edi
 ; X86-NEXT:    shrl $31, %edi
-; X86-NEXT:    shldl $31, %edx, %edi
-; X86-NEXT:    pushl %ecx
-; X86-NEXT:    movl %ecx, %ebp
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl $31, %ecx, %edi
+; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl %eax
-; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl %eax, %ebp
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    calll __moddi3
 ; X86-NEXT:    addl $16, %esp
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    calll __divdi3
 ; X86-NEXT:    addl $16, %esp
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %ebx
-; X86-NEXT:    sarl $31, %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    sarl $31, %ebp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl %ecx, %esi
 ; X86-NEXT:    shll $31, %esi
-; X86-NEXT:    movl %ecx, %ebp
-; X86-NEXT:    shrl $31, %ebp
-; X86-NEXT:    shldl $31, %ecx, %ebp
-; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %eax
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    shrl $31, %ebx
+; X86-NEXT:    shldl $31, %ecx, %ebx
 ; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    calll __moddi3
 ; X86-NEXT:    addl $16, %esp
 ; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
 ; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    calll __divdi3
 ; X86-NEXT:    addl $16, %esp
-; X86-NEXT:    testl %ebp, %ebp
-; X86-NEXT:    sets %cl
 ; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    sets %cl
+; X86-NEXT:    testl %ebp, %ebp
 ; X86-NEXT:    sets %dl
 ; X86-NEXT:    xorb %cl, %dl
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -668,8 +666,8 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    setne %al
 ; X86-NEXT:    testb %cl, %al
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    leal -1(%eax), %ebp
-; X86-NEXT:    cmovel %eax, %ebp
+; X86-NEXT:    leal -1(%eax), %esi
+; X86-NEXT:    cmovel %eax, %esi
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X86-NEXT:    testl %edx, %edx
 ; X86-NEXT:    sets %al
@@ -678,7 +676,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    sets %bl
 ; X86-NEXT:    xorb %al, %bl
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT:    leal -1(%edi), %esi
+; X86-NEXT:    leal -1(%edi), %ebp
 ; X86-NEXT:    pushl %ecx
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl %edx
@@ -688,10 +686,10 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    orl %eax, %edx
 ; X86-NEXT:    setne %al
 ; X86-NEXT:    testb %bl, %al
-; X86-NEXT:    cmovel %edi, %esi
+; X86-NEXT:    cmovel %edi, %ebp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %esi, 12(%eax)
-; X86-NEXT:    movl %ebp, 8(%eax)
+; X86-NEXT:    movl %ebp, 12(%eax)
+; X86-NEXT:    movl %esi, 8(%eax)
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X86-NEXT:    movl %ecx, 4(%eax)
 ; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload