[llvm] [RegisterPressure] Remove dead defs correctly (PR #156576)

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 2 20:25:31 PDT 2025


https://github.com/XChy created https://github.com/llvm/llvm-project/pull/156576

Fixes https://github.com/llvm/llvm-project/issues/134424.
IIUC, we should remove the dead registers in `RegOpers.Defs`, instead of `RegOpers.DeadDefs` when collecting operands.

In the testcase, LiveVariable Analysis transforms `inlineasm rax, eax` to `inlineasm dead rax, eax` instead of `inlineasm dead rax, dead eax`. But `eax` is actually dead too, as its super register is dead. However, RegisterOperandsCollector still considers `eax` an alive register. We should remove `eax` from the `Defs`, instead of removing the `eax` from DeadDefs.

It's terrible to debug and figure out how the assertion is triggered. I am sorry for failing to explain the entire crash process.

>From fc4665ecd22171b1c29facb22a0c6fd1b0cbc929 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Wed, 3 Sep 2025 02:16:19 +0800
Subject: [PATCH] [RegisterPressure] Remove dead defs correctly

---
 llvm/lib/CodeGen/RegisterPressure.cpp         |   8 +-
 .../div-rem-pair-recomposition-unsigned.ll    | 146 ++++++++++--------
 .../CodeGen/X86/inline-asm-clobber-subreg.ll  |  18 +++
 llvm/test/CodeGen/X86/scalar_widen_div.ll     |  10 +-
 llvm/test/CodeGen/X86/ssub_sat_plus.ll        |   4 +-
 llvm/test/CodeGen/X86/udiv_fix.ll             |  32 ++--
 llvm/test/CodeGen/X86/udiv_fix_sat.ll         |  82 +++++-----
 llvm/test/CodeGen/X86/usub_sat_plus.ll        |   4 +-
 llvm/test/CodeGen/X86/xmulo.ll                |  12 +-
 9 files changed, 175 insertions(+), 141 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/inline-asm-clobber-subreg.ll

diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp
index 5f3789050b813..e43e459f4f247 100644
--- a/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -477,8 +477,8 @@ class RegisterOperandsCollector {
       collectOperand(*OperI);
 
     // Remove redundant physreg dead defs.
-    for (const VRegMaskOrUnit &P : RegOpers.Defs)
-      removeRegLanes(RegOpers.DeadDefs, P);
+    for (const VRegMaskOrUnit &P : RegOpers.DeadDefs)
+      removeRegLanes(RegOpers.Defs, P);
   }
 
   void collectInstrLanes(const MachineInstr &MI) const {
@@ -486,8 +486,8 @@ class RegisterOperandsCollector {
       collectOperandLanes(*OperI);
 
     // Remove redundant physreg dead defs.
-    for (const VRegMaskOrUnit &P : RegOpers.Defs)
-      removeRegLanes(RegOpers.DeadDefs, P);
+    for (const VRegMaskOrUnit &P : RegOpers.DeadDefs)
+      removeRegLanes(RegOpers.Defs, P);
   }
 
   /// Push this operand's register onto the correct vectors.
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
index 859e9244d29d2..5c8784cf19338 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
@@ -776,145 +776,157 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y, ptr %divdst) nounwi
 define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y, ptr %divdst) nounwind {
 ; X86-LABEL: vector_i128_i16:
 ; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    pushl %eax
 ; X86-NEXT:    pextrw $7, %xmm0, %eax
-; X86-NEXT:    pextrw $7, %xmm1, %esi
+; X86-NEXT:    pextrw $7, %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
-; X86-NEXT:    # kill: def $ax killed $ax def $eax
-; X86-NEXT:    movd %eax, %xmm2
+; X86-NEXT:    divw %cx
+; X86-NEXT:    movl %eax, %esi
 ; X86-NEXT:    pextrw $6, %xmm0, %eax
-; X86-NEXT:    pextrw $6, %xmm1, %esi
+; X86-NEXT:    pextrw $6, %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
+; X86-NEXT:    divw %cx
 ; X86-NEXT:    # kill: def $ax killed $ax def $eax
-; X86-NEXT:    movd %eax, %xmm3
-; X86-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
 ; X86-NEXT:    pextrw $5, %xmm0, %eax
-; X86-NEXT:    pextrw $5, %xmm1, %esi
+; X86-NEXT:    pextrw $5, %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
-; X86-NEXT:    # kill: def $ax killed $ax def $eax
-; X86-NEXT:    movd %eax, %xmm4
+; X86-NEXT:    divw %cx
+; X86-NEXT:    movl %eax, %edi
 ; X86-NEXT:    pextrw $4, %xmm0, %eax
-; X86-NEXT:    pextrw $4, %xmm1, %esi
+; X86-NEXT:    pextrw $4, %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
-; X86-NEXT:    # kill: def $ax killed $ax def $eax
-; X86-NEXT:    movd %eax, %xmm2
-; X86-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; X86-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; X86-NEXT:    divw %cx
+; X86-NEXT:    movl %eax, %ebx
 ; X86-NEXT:    pextrw $3, %xmm0, %eax
-; X86-NEXT:    pextrw $3, %xmm1, %esi
+; X86-NEXT:    pextrw $3, %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
-; X86-NEXT:    # kill: def $ax killed $ax def $eax
-; X86-NEXT:    movd %eax, %xmm4
+; X86-NEXT:    divw %cx
+; X86-NEXT:    movl %eax, %ebp
 ; X86-NEXT:    pextrw $2, %xmm0, %eax
-; X86-NEXT:    pextrw $2, %xmm1, %esi
+; X86-NEXT:    pextrw $2, %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
-; X86-NEXT:    # kill: def $ax killed $ax def $eax
-; X86-NEXT:    movd %eax, %xmm3
-; X86-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; X86-NEXT:    divw %cx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movd %esi, %xmm3
 ; X86-NEXT:    pextrw $1, %xmm0, %eax
 ; X86-NEXT:    pextrw $1, %xmm1, %esi
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    divw %si
 ; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    movd (%esp), %xmm4 # 4-byte Folded Reload
+; X86-NEXT:    # xmm4 = mem[0],zero,zero,zero
+; X86-NEXT:    movd %edi, %xmm5
+; X86-NEXT:    movd %ebx, %xmm2
+; X86-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; X86-NEXT:    movd %ebp, %xmm6
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3]
+; X86-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; X86-NEXT:    movd %ecx, %xmm3
+; X86-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1],xmm3[2],xmm6[2],xmm3[3],xmm6[3]
 ; X86-NEXT:    movd %eax, %xmm4
 ; X86-NEXT:    movd %xmm0, %eax
-; X86-NEXT:    movd %xmm1, %esi
+; X86-NEXT:    movd %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
+; X86-NEXT:    divw %cx
 ; X86-NEXT:    # kill: def $ax killed $ax def $eax
 ; X86-NEXT:    movd %eax, %xmm5
 ; X86-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
 ; X86-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
 ; X86-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm2[0]
-; X86-NEXT:    movdqa %xmm5, (%ecx)
+; X86-NEXT:    movdqa %xmm5, (%esi)
 ; X86-NEXT:    pmullw %xmm1, %xmm5
 ; X86-NEXT:    psubw %xmm5, %xmm0
+; X86-NEXT:    addl $4, %esp
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: vector_i128_i16:
 ; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
 ; X64-NEXT:    pextrw $7, %xmm0, %eax
 ; X64-NEXT:    pextrw $7, %xmm1, %ecx
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    divw %cx
-; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm2
+; X64-NEXT:    movl %eax, %ecx
 ; X64-NEXT:    pextrw $6, %xmm0, %eax
-; X64-NEXT:    pextrw $6, %xmm1, %ecx
+; X64-NEXT:    pextrw $6, %xmm1, %esi
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divw %cx
-; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm3
-; X64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT:    divw %si
+; X64-NEXT:    movl %eax, %esi
 ; X64-NEXT:    pextrw $5, %xmm0, %eax
-; X64-NEXT:    pextrw $5, %xmm1, %ecx
+; X64-NEXT:    pextrw $5, %xmm1, %r8d
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divw %cx
-; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm4
+; X64-NEXT:    divw %r8w
+; X64-NEXT:    movl %eax, %r8d
 ; X64-NEXT:    pextrw $4, %xmm0, %eax
-; X64-NEXT:    pextrw $4, %xmm1, %ecx
+; X64-NEXT:    pextrw $4, %xmm1, %r9d
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divw %cx
-; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm2
-; X64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; X64-NEXT:    divw %r9w
+; X64-NEXT:    movl %eax, %r9d
 ; X64-NEXT:    pextrw $3, %xmm0, %eax
-; X64-NEXT:    pextrw $3, %xmm1, %ecx
+; X64-NEXT:    pextrw $3, %xmm1, %r10d
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divw %cx
-; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm3
+; X64-NEXT:    divw %r10w
+; X64-NEXT:    movl %eax, %r10d
 ; X64-NEXT:    pextrw $2, %xmm0, %eax
-; X64-NEXT:    pextrw $2, %xmm1, %ecx
+; X64-NEXT:    pextrw $2, %xmm1, %r11d
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divw %cx
-; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm4
-; X64-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; X64-NEXT:    divw %r11w
+; X64-NEXT:    movl %eax, %r11d
 ; X64-NEXT:    pextrw $1, %xmm0, %eax
-; X64-NEXT:    pextrw $1, %xmm1, %ecx
+; X64-NEXT:    pextrw $1, %xmm1, %ebx
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divw %cx
+; X64-NEXT:    divw %bx
 ; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm3
+; X64-NEXT:    movd %ecx, %xmm2
+; X64-NEXT:    movd %esi, %xmm3
+; X64-NEXT:    movd %r8d, %xmm4
+; X64-NEXT:    movd %r9d, %xmm5
+; X64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT:    movd %r10d, %xmm2
+; X64-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
+; X64-NEXT:    movd %r11d, %xmm3
+; X64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT:    movd %eax, %xmm2
 ; X64-NEXT:    movd %xmm0, %eax
 ; X64-NEXT:    movd %xmm1, %ecx
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    divw %cx
 ; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm5
-; X64-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
-; X64-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
-; X64-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm2[0]
-; X64-NEXT:    movdqa %xmm5, (%rdi)
-; X64-NEXT:    pmullw %xmm1, %xmm5
-; X64-NEXT:    psubw %xmm5, %xmm0
+; X64-NEXT:    movd %eax, %xmm4
+; X64-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
+; X64-NEXT:    movdqa %xmm4, (%rdi)
+; X64-NEXT:    pmullw %xmm1, %xmm4
+; X64-NEXT:    psubw %xmm4, %xmm0
+; X64-NEXT:    popq %rbx
 ; X64-NEXT:    retq
   %div = udiv <8 x i16> %x, %y
   store <8 x i16> %div, ptr %divdst, align 16
diff --git a/llvm/test/CodeGen/X86/inline-asm-clobber-subreg.ll b/llvm/test/CodeGen/X86/inline-asm-clobber-subreg.ll
new file mode 100644
index 0000000000000..638a8eeb2d472
--- /dev/null
+++ b/llvm/test/CodeGen/X86/inline-asm-clobber-subreg.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=x86_64 < %s 2>&1 | FileCheck %s
+
+define i64 @foo(i64 %vecext) {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    movabsq $705425148255374, %rax # imm = 0x2819497609C8E
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    idivq %rdi
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  tail call void asm sideeffect "", "~{rax},~{eax},~{dirflag},~{fpsr},~{flags}"()
+  %rem = srem i64 705425148255374, %vecext
+  ret i64 %rem
+}
diff --git a/llvm/test/CodeGen/X86/scalar_widen_div.ll b/llvm/test/CodeGen/X86/scalar_widen_div.ll
index 1d98b4f62069d..d50d8ed7aaf6a 100644
--- a/llvm/test/CodeGen/X86/scalar_widen_div.ll
+++ b/llvm/test/CodeGen/X86/scalar_widen_div.ll
@@ -150,15 +150,15 @@ define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) {
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    divw %si
-; CHECK-NEXT:    # kill: def $ax killed $ax def $eax
-; CHECK-NEXT:    movd %eax, %xmm2
-; CHECK-NEXT:    pinsrw $1, %ecx, %xmm2
+; CHECK-NEXT:    movl %eax, %esi
 ; CHECK-NEXT:    pextrw $2, %xmm0, %eax
-; CHECK-NEXT:    pextrw $2, %xmm1, %ecx
+; CHECK-NEXT:    pextrw $2, %xmm1, %edi
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    divw %cx
+; CHECK-NEXT:    divw %di
 ; CHECK-NEXT:    # kill: def $ax killed $ax def $eax
+; CHECK-NEXT:    movd %esi, %xmm2
+; CHECK-NEXT:    pinsrw $1, %ecx, %xmm2
 ; CHECK-NEXT:    pinsrw $2, %eax, %xmm2
 ; CHECK-NEXT:    pextrw $3, %xmm0, %eax
 ; CHECK-NEXT:    pextrw $3, %xmm1, %ecx
diff --git a/llvm/test/CodeGen/X86/ssub_sat_plus.ll b/llvm/test/CodeGen/X86/ssub_sat_plus.ll
index 5baf7a1dac74c..8b96a8050e65e 100644
--- a/llvm/test/CodeGen/X86/ssub_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat_plus.ll
@@ -105,9 +105,9 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw
 define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
 ; X86-LABEL: func8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    xorl %ecx, %ecx
 ; X86-NEXT:    cmpb %al, %dl
 ; X86-NEXT:    setns %cl
@@ -140,9 +140,9 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
 define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind {
 ; X86-LABEL: func4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    shlb $4, %al
 ; X86-NEXT:    sarb $4, %al
 ; X86-NEXT:    subb %al, %cl
diff --git a/llvm/test/CodeGen/X86/udiv_fix.ll b/llvm/test/CodeGen/X86/udiv_fix.ll
index 82dfeeee13293..f49dec5714ff0 100644
--- a/llvm/test/CodeGen/X86/udiv_fix.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix.ll
@@ -243,28 +243,30 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X64-NEXT:    movdqa %xmm1, %xmm3
 ; X64-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
 ; X64-NEXT:    movq %xmm3, %rcx
-; X64-NEXT:    movdqa %xmm0, %xmm4
-; X64-NEXT:    punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm2[2],xmm4[3],xmm2[3]
-; X64-NEXT:    psllq $31, %xmm4
-; X64-NEXT:    movq %xmm4, %rax
+; X64-NEXT:    movdqa %xmm0, %xmm3
+; X64-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT:    psllq $31, %xmm3
+; X64-NEXT:    movq %xmm3, %rax
 ; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    divq %rcx
-; X64-NEXT:    movq %rax, %xmm3
-; X64-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
-; X64-NEXT:    movq %xmm4, %rax
-; X64-NEXT:    movdqa %xmm1, %xmm4
-; X64-NEXT:    psrldq {{.*#+}} xmm4 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X64-NEXT:    movq %xmm4, %rcx
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
+; X64-NEXT:    movq %xmm3, %rax
+; X64-NEXT:    movdqa %xmm1, %xmm3
+; X64-NEXT:    psrldq {{.*#+}} xmm3 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-NEXT:    movq %xmm3, %rsi
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divq %rcx
-; X64-NEXT:    movq %rax, %xmm4
-; X64-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; X64-NEXT:    divq %rsi
+; X64-NEXT:    movq %rax, %rsi
 ; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
 ; X64-NEXT:    psllq $31, %xmm0
 ; X64-NEXT:    movq %xmm0, %rax
-; X64-NEXT:    movd %xmm1, %ecx
+; X64-NEXT:    movd %xmm1, %edi
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divq %rcx
+; X64-NEXT:    divq %rdi
+; X64-NEXT:    movq %rcx, %xmm3
+; X64-NEXT:    movq %rsi, %xmm2
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
 ; X64-NEXT:    movq %rax, %xmm2
 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
 ; X64-NEXT:    movq %xmm0, %rax
diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
index 3da5973f9f903..ad95deabb2e55 100644
--- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
@@ -320,59 +320,61 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X64-NEXT:    movq %xmm4, %rcx
 ; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    divq %rcx
-; X64-NEXT:    movq %rax, %xmm8
+; X64-NEXT:    movq %rax, %rcx
 ; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
 ; X64-NEXT:    movq %xmm3, %rax
 ; X64-NEXT:    movdqa %xmm1, %xmm3
 ; X64-NEXT:    psrldq {{.*#+}} xmm3 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X64-NEXT:    movq %xmm3, %rcx
+; X64-NEXT:    movq %xmm3, %rsi
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divq %rcx
-; X64-NEXT:    movq %rax, %xmm3
-; X64-NEXT:    punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm3[0]
-; X64-NEXT:    movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
-; X64-NEXT:    movdqa %xmm8, %xmm3
-; X64-NEXT:    pxor %xmm4, %xmm3
-; X64-NEXT:    pshufd {{.*#+}} xmm6 = xmm3[1,1,3,3]
-; X64-NEXT:    movdqa {{.*#+}} xmm7 = [2147483649,2147483649,2147483649,2147483649]
-; X64-NEXT:    pcmpeqd %xmm7, %xmm6
-; X64-NEXT:    movdqa {{.*#+}} xmm5 = [9223372043297226751,9223372043297226751]
-; X64-NEXT:    movdqa %xmm5, %xmm9
-; X64-NEXT:    pcmpgtd %xmm3, %xmm9
-; X64-NEXT:    pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2]
-; X64-NEXT:    pand %xmm6, %xmm10
-; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm9[1,1,3,3]
-; X64-NEXT:    por %xmm10, %xmm3
-; X64-NEXT:    pcmpeqd %xmm6, %xmm6
-; X64-NEXT:    pand %xmm3, %xmm8
-; X64-NEXT:    pxor %xmm6, %xmm3
-; X64-NEXT:    por %xmm8, %xmm3
-; X64-NEXT:    psrlq $1, %xmm3
+; X64-NEXT:    divq %rsi
+; X64-NEXT:    movq %rax, %rsi
 ; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
 ; X64-NEXT:    movq %xmm2, %rax
-; X64-NEXT:    movd %xmm1, %ecx
+; X64-NEXT:    movd %xmm1, %edi
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divq %rcx
-; X64-NEXT:    movq %rax, %xmm8
-; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
-; X64-NEXT:    movq %xmm0, %rax
+; X64-NEXT:    divq %rdi
+; X64-NEXT:    movq %rcx, %xmm7
+; X64-NEXT:    movq %rsi, %xmm0
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm0[0]
+; X64-NEXT:    movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
+; X64-NEXT:    movdqa %xmm7, %xmm3
+; X64-NEXT:    pxor %xmm0, %xmm3
+; X64-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
+; X64-NEXT:    movdqa {{.*#+}} xmm6 = [2147483649,2147483649,2147483649,2147483649]
+; X64-NEXT:    pcmpeqd %xmm6, %xmm4
+; X64-NEXT:    movdqa {{.*#+}} xmm5 = [9223372043297226751,9223372043297226751]
+; X64-NEXT:    movdqa %xmm5, %xmm8
+; X64-NEXT:    pcmpgtd %xmm3, %xmm8
+; X64-NEXT:    pshufd {{.*#+}} xmm9 = xmm8[0,0,2,2]
+; X64-NEXT:    pand %xmm4, %xmm9
+; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm8[1,1,3,3]
+; X64-NEXT:    por %xmm9, %xmm3
+; X64-NEXT:    pcmpeqd %xmm4, %xmm4
+; X64-NEXT:    pand %xmm3, %xmm7
+; X64-NEXT:    pxor %xmm4, %xmm3
+; X64-NEXT:    por %xmm7, %xmm3
+; X64-NEXT:    psrlq $1, %xmm3
+; X64-NEXT:    movq %rax, %xmm7
+; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
+; X64-NEXT:    movq %xmm2, %rax
 ; X64-NEXT:    psrlq $32, %xmm1
 ; X64-NEXT:    movq %xmm1, %rcx
 ; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    divq %rcx
-; X64-NEXT:    movq %rax, %xmm0
-; X64-NEXT:    punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm0[0]
-; X64-NEXT:    pxor %xmm8, %xmm4
-; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
-; X64-NEXT:    pcmpeqd %xmm7, %xmm0
-; X64-NEXT:    pcmpgtd %xmm4, %xmm5
-; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[0,0,2,2]
-; X64-NEXT:    pand %xmm0, %xmm1
+; X64-NEXT:    movq %rax, %xmm1
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm1[0]
+; X64-NEXT:    pxor %xmm7, %xmm0
+; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; X64-NEXT:    pcmpeqd %xmm6, %xmm1
+; X64-NEXT:    pcmpgtd %xmm0, %xmm5
+; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[0,0,2,2]
+; X64-NEXT:    pand %xmm1, %xmm2
 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
-; X64-NEXT:    por %xmm1, %xmm0
-; X64-NEXT:    pxor %xmm0, %xmm6
-; X64-NEXT:    pand %xmm8, %xmm0
-; X64-NEXT:    por %xmm6, %xmm0
+; X64-NEXT:    por %xmm2, %xmm0
+; X64-NEXT:    pxor %xmm0, %xmm4
+; X64-NEXT:    pand %xmm7, %xmm0
+; X64-NEXT:    por %xmm4, %xmm0
 ; X64-NEXT:    psrlq $1, %xmm0
 ; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
 ; X64-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/usub_sat_plus.ll b/llvm/test/CodeGen/X86/usub_sat_plus.ll
index 0fb14ad5cf7b0..725de4401cb87 100644
--- a/llvm/test/CodeGen/X86/usub_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/usub_sat_plus.ll
@@ -82,9 +82,9 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y, i16 zeroext %z) nounw
 define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind {
 ; X86-LABEL: func8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    subb %al, %cl
 ; X86-NEXT:    movzbl %cl, %eax
@@ -111,9 +111,9 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind {
 define zeroext i4 @func4(i4 zeroext %x, i4 zeroext %y, i4 zeroext %z) nounwind {
 ; X86-LABEL: func4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    andb $15, %al
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    subb %al, %cl
diff --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll
index a076d0d762aa3..657547800075c 100644
--- a/llvm/test/CodeGen/X86/xmulo.ll
+++ b/llvm/test/CodeGen/X86/xmulo.ll
@@ -90,9 +90,9 @@ define zeroext i1 @smuloi8(i8 %v1, i8 %v2, ptr %res) {
 ;
 ; WIN32-LABEL: smuloi8:
 ; WIN32:       # %bb.0:
-; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; WIN32-NEXT:    imulb {{[0-9]+}}(%esp)
+; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    seto %cl
 ; WIN32-NEXT:    movb %al, (%edx)
 ; WIN32-NEXT:    movl %ecx, %eax
@@ -303,9 +303,9 @@ define zeroext i1 @umuloi8(i8 %v1, i8 %v2, ptr %res) {
 ;
 ; WIN32-LABEL: umuloi8:
 ; WIN32:       # %bb.0:
-; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; WIN32-NEXT:    mulb {{[0-9]+}}(%esp)
+; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    seto %cl
 ; WIN32-NEXT:    movb %al, (%edx)
 ; WIN32-NEXT:    movl %ecx, %eax
@@ -1386,10 +1386,10 @@ define zeroext i1 @smuloi8_load(ptr %ptr1, i8 %v2, ptr %res) {
 ;
 ; WIN32-LABEL: smuloi8_load:
 ; WIN32:       # %bb.0:
-; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; WIN32-NEXT:    movzbl (%eax), %eax
 ; WIN32-NEXT:    imulb {{[0-9]+}}(%esp)
+; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    seto %cl
 ; WIN32-NEXT:    movb %al, (%edx)
 ; WIN32-NEXT:    movl %ecx, %eax
@@ -1435,10 +1435,10 @@ define zeroext i1 @smuloi8_load2(i8 %v1, ptr %ptr2, ptr %res) {
 ;
 ; WIN32-LABEL: smuloi8_load2:
 ; WIN32:       # %bb.0:
-; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; WIN32-NEXT:    imulb (%ecx)
+; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    seto %cl
 ; WIN32-NEXT:    movb %al, (%edx)
 ; WIN32-NEXT:    movl %ecx, %eax
@@ -1829,10 +1829,10 @@ define zeroext i1 @umuloi8_load(ptr %ptr1, i8 %v2, ptr %res) {
 ;
 ; WIN32-LABEL: umuloi8_load:
 ; WIN32:       # %bb.0:
-; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; WIN32-NEXT:    movzbl (%eax), %eax
 ; WIN32-NEXT:    mulb {{[0-9]+}}(%esp)
+; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    seto %cl
 ; WIN32-NEXT:    movb %al, (%edx)
 ; WIN32-NEXT:    movl %ecx, %eax
@@ -1878,10 +1878,10 @@ define zeroext i1 @umuloi8_load2(i8 %v1, ptr %ptr2, ptr %res) {
 ;
 ; WIN32-LABEL: umuloi8_load2:
 ; WIN32:       # %bb.0:
-; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; WIN32-NEXT:    mulb (%ecx)
+; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    seto %cl
 ; WIN32-NEXT:    movb %al, (%edx)
 ; WIN32-NEXT:    movl %ecx, %eax



More information about the llvm-commits mailing list