[llvm] 2826869 - [DAG] Do not combine any_ext when we combine and into zext.

Thu Jan 19 04:37:12 PST 2023

Author: Amaury Séchet
Date: 2023-01-19T12:37:05Z
New Revision: 2826869d7b3119f96b973f5cd15128162334f1fe

URL: https://github.com/llvm/llvm-project/commit/2826869d7b3119f96b973f5cd15128162334f1fe
DIFF: https://github.com/llvm/llvm-project/commit/2826869d7b3119f96b973f5cd15128162334f1fe.diff

LOG: [DAG] Do not combine any_ext when we combine and into zext.

This transofrm loses information that can be useful for other transforms.

Reviewed By: lebedev.ri

Differential Revision: https://reviews.llvm.org/D141883

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
    llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll
    llvm/test/CodeGen/X86/known-signbits-vector.ll
    llvm/test/CodeGen/X86/ushl_sat.ll
    llvm/test/CodeGen/X86/ushl_sat_vec.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 49d6eb9732059..36c077ae2f12c 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6342,19 +6342,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     SDValue N0Op0 = N0.getOperand(0);
     APInt Mask = ~N1C->getAPIntValue();
     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
-    if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
-      SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
-                                 N0.getValueType(), N0Op0);
-
-      // Replace uses of the AND with uses of the Zero extend node.
-      CombineTo(N, Zext);
-
-      // We actually want to replace all uses of the any_extend with the
-      // zero_extend, to avoid duplicating things.  This will later cause this
-      // AND to be folded.
-      CombineTo(N0.getNode(), Zext);
-      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
-    }
+    if (DAG.MaskedValueIsZero(N0Op0, Mask))
+      return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N0.getValueType(), N0Op0);
   }
 
   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->

diff  --git a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
index 4d6d5edd16510..45d74ea5b196c 100644
--- a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
+++ b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
@@ -800,52 +800,52 @@ define <16 x i8> @_clearupper16xi8b(<16 x i8>) nounwind {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    pushq %rbx
 ; AVX-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rdx
+; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
 ; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rdi
-; AVX-NEXT:    movq %rdx, %rax
-; AVX-NEXT:    shrq $56, %rax
+; AVX-NEXT:    movq %rcx, %rax
+; AVX-NEXT:    shrq $48, %rax
 ; AVX-NEXT:    andl $15, %eax
-; AVX-NEXT:    movq %rdx, %rcx
-; AVX-NEXT:    shrq $48, %rcx
-; AVX-NEXT:    andl $15, %ecx
-; AVX-NEXT:    movq %rdx, %rsi
-; AVX-NEXT:    shrq $40, %rsi
+; AVX-NEXT:    movq %rcx, %rdx
+; AVX-NEXT:    shrq $40, %rdx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    movq %rcx, %rsi
+; AVX-NEXT:    shrq $32, %rsi
 ; AVX-NEXT:    andl $15, %esi
-; AVX-NEXT:    movq %rdx, %r8
-; AVX-NEXT:    shrq $32, %r8
-; AVX-NEXT:    andl $15, %r8d
 ; AVX-NEXT:    movq %rdi, %r9
-; AVX-NEXT:    shrq $56, %r9
+; AVX-NEXT:    shrq $48, %r9
 ; AVX-NEXT:    andl $15, %r9d
 ; AVX-NEXT:    movq %rdi, %r10
-; AVX-NEXT:    shrq $48, %r10
+; AVX-NEXT:    shrq $40, %r10
 ; AVX-NEXT:    andl $15, %r10d
 ; AVX-NEXT:    movq %rdi, %r11
-; AVX-NEXT:    shrq $40, %r11
+; AVX-NEXT:    shrq $32, %r11
 ; AVX-NEXT:    andl $15, %r11d
+; AVX-NEXT:    movq %rcx, %r8
+; AVX-NEXT:    shrq $56, %r8
+; AVX-NEXT:    andl $15, %r8d
 ; AVX-NEXT:    movq %rdi, %rbx
-; AVX-NEXT:    shrq $32, %rbx
+; AVX-NEXT:    shrq $56, %rbx
 ; AVX-NEXT:    andl $15, %ebx
-; AVX-NEXT:    shlq $32, %rbx
+; AVX-NEXT:    shlq $32, %r11
 ; AVX-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
-; AVX-NEXT:    orq %rbx, %rdi
-; AVX-NEXT:    shlq $40, %r11
-; AVX-NEXT:    orq %rdi, %r11
-; AVX-NEXT:    shlq $48, %r10
-; AVX-NEXT:    orq %r11, %r10
-; AVX-NEXT:    shlq $56, %r9
+; AVX-NEXT:    orq %r11, %rdi
+; AVX-NEXT:    shlq $40, %r10
+; AVX-NEXT:    orq %rdi, %r10
+; AVX-NEXT:    shlq $48, %r9
 ; AVX-NEXT:    orq %r10, %r9
-; AVX-NEXT:    movq %r9, -{{[0-9]+}}(%rsp)
-; AVX-NEXT:    shlq $32, %r8
-; AVX-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
-; AVX-NEXT:    orq %r8, %rdx
-; AVX-NEXT:    shlq $40, %rsi
-; AVX-NEXT:    orq %rdx, %rsi
-; AVX-NEXT:    shlq $48, %rcx
+; AVX-NEXT:    shlq $56, %rbx
+; AVX-NEXT:    orq %r9, %rbx
+; AVX-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp)
+; AVX-NEXT:    shlq $32, %rsi
+; AVX-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
 ; AVX-NEXT:    orq %rsi, %rcx
-; AVX-NEXT:    shlq $56, %rax
-; AVX-NEXT:    orq %rcx, %rax
-; AVX-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
+; AVX-NEXT:    shlq $40, %rdx
+; AVX-NEXT:    orq %rcx, %rdx
+; AVX-NEXT:    shlq $48, %rax
+; AVX-NEXT:    orq %rdx, %rax
+; AVX-NEXT:    shlq $56, %r8
+; AVX-NEXT:    orq %rax, %r8
+; AVX-NEXT:    movq %r8, -{{[0-9]+}}(%rsp)
 ; AVX-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
 ; AVX-NEXT:    popq %rbx
 ; AVX-NEXT:    retq
@@ -982,52 +982,52 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    pushq %rbx
 ; AVX-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rdx
+; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
 ; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rdi
-; AVX-NEXT:    movq %rdx, %rax
-; AVX-NEXT:    shrq $56, %rax
+; AVX-NEXT:    movq %rcx, %rax
+; AVX-NEXT:    shrq $48, %rax
 ; AVX-NEXT:    andl $15, %eax
-; AVX-NEXT:    movq %rdx, %rcx
-; AVX-NEXT:    shrq $48, %rcx
-; AVX-NEXT:    andl $15, %ecx
-; AVX-NEXT:    movq %rdx, %rsi
-; AVX-NEXT:    shrq $40, %rsi
+; AVX-NEXT:    movq %rcx, %rdx
+; AVX-NEXT:    shrq $40, %rdx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    movq %rcx, %rsi
+; AVX-NEXT:    shrq $32, %rsi
 ; AVX-NEXT:    andl $15, %esi
-; AVX-NEXT:    movq %rdx, %r8
-; AVX-NEXT:    shrq $32, %r8
-; AVX-NEXT:    andl $15, %r8d
 ; AVX-NEXT:    movq %rdi, %r9
-; AVX-NEXT:    shrq $56, %r9
+; AVX-NEXT:    shrq $48, %r9
 ; AVX-NEXT:    andl $15, %r9d
 ; AVX-NEXT:    movq %rdi, %r10
-; AVX-NEXT:    shrq $48, %r10
+; AVX-NEXT:    shrq $40, %r10
 ; AVX-NEXT:    andl $15, %r10d
 ; AVX-NEXT:    movq %rdi, %r11
-; AVX-NEXT:    shrq $40, %r11
+; AVX-NEXT:    shrq $32, %r11
 ; AVX-NEXT:    andl $15, %r11d
+; AVX-NEXT:    movq %rcx, %r8
+; AVX-NEXT:    shrq $56, %r8
+; AVX-NEXT:    andl $15, %r8d
 ; AVX-NEXT:    movq %rdi, %rbx
-; AVX-NEXT:    shrq $32, %rbx
+; AVX-NEXT:    shrq $56, %rbx
 ; AVX-NEXT:    andl $15, %ebx
-; AVX-NEXT:    shlq $32, %rbx
+; AVX-NEXT:    shlq $32, %r11
 ; AVX-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
-; AVX-NEXT:    orq %rbx, %rdi
-; AVX-NEXT:    shlq $40, %r11
-; AVX-NEXT:    orq %rdi, %r11
-; AVX-NEXT:    shlq $48, %r10
-; AVX-NEXT:    orq %r11, %r10
-; AVX-NEXT:    shlq $56, %r9
+; AVX-NEXT:    orq %r11, %rdi
+; AVX-NEXT:    shlq $40, %r10
+; AVX-NEXT:    orq %rdi, %r10
+; AVX-NEXT:    shlq $48, %r9
 ; AVX-NEXT:    orq %r10, %r9
-; AVX-NEXT:    movq %r9, -{{[0-9]+}}(%rsp)
-; AVX-NEXT:    shlq $32, %r8
-; AVX-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
-; AVX-NEXT:    orq %r8, %rdx
-; AVX-NEXT:    shlq $40, %rsi
-; AVX-NEXT:    orq %rdx, %rsi
-; AVX-NEXT:    shlq $48, %rcx
+; AVX-NEXT:    shlq $56, %rbx
+; AVX-NEXT:    orq %r9, %rbx
+; AVX-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp)
+; AVX-NEXT:    shlq $32, %rsi
+; AVX-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
 ; AVX-NEXT:    orq %rsi, %rcx
-; AVX-NEXT:    shlq $56, %rax
-; AVX-NEXT:    orq %rcx, %rax
-; AVX-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
+; AVX-NEXT:    shlq $40, %rdx
+; AVX-NEXT:    orq %rcx, %rdx
+; AVX-NEXT:    shlq $48, %rax
+; AVX-NEXT:    orq %rdx, %rax
+; AVX-NEXT:    shlq $56, %r8
+; AVX-NEXT:    orq %rax, %r8
+; AVX-NEXT:    movq %r8, -{{[0-9]+}}(%rsp)
 ; AVX-NEXT:    vinsertf128 $0, -{{[0-9]+}}(%rsp), %ymm0, %ymm0
 ; AVX-NEXT:    popq %rbx
 ; AVX-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll b/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll
index 649d3f8b48292..05ad92cc0b330 100644
--- a/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll
+++ b/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll
@@ -114,12 +114,8 @@ define void @i56_or(ptr %a) {
 ;
 ; X64-LABEL: i56_or:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzbl 6(%rdi), %eax
-; X64-NEXT:    shll $16, %eax
-; X64-NEXT:    movzwl 4(%rdi), %ecx
-; X64-NEXT:    movw %cx, 4(%rdi)
-; X64-NEXT:    shrq $16, %rax
-; X64-NEXT:    movb %al, 6(%rdi)
+; X64-NEXT:    movzwl 4(%rdi), %eax
+; X64-NEXT:    movw %ax, 4(%rdi)
 ; X64-NEXT:    orl $384, (%rdi) # imm = 0x180
 ; X64-NEXT:    retq
   %aa = load i56, ptr %a, align 1

diff  --git a/llvm/test/CodeGen/X86/known-signbits-vector.ll b/llvm/test/CodeGen/X86/known-signbits-vector.ll
index 25d26372af4b3..14619d13ba3dc 100644
--- a/llvm/test/CodeGen/X86/known-signbits-vector.ll
+++ b/llvm/test/CodeGen/X86/known-signbits-vector.ll
@@ -679,6 +679,8 @@ define i64 @signbits_cmpsd(double %0, double %1) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0
 ; X64-NEXT:    vmovq %xmm0, %rax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    negq %rax
 ; X64-NEXT:    retq
   %3 = fcmp oeq double %0, %1
   %4 = sext i1 %3 to i64

diff  --git a/llvm/test/CodeGen/X86/ushl_sat.ll b/llvm/test/CodeGen/X86/ushl_sat.ll
index a4f0656c13aff..e0e1ef7108d0d 100644
--- a/llvm/test/CodeGen/X86/ushl_sat.ll
+++ b/llvm/test/CodeGen/X86/ushl_sat.ll
@@ -14,10 +14,9 @@ define i16 @func(i16 %x, i16 %y) nounwind {
 ; X64-LABEL: func:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    shll %cl, %eax
-; X64-NEXT:    movzwl %ax, %edx
-; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    movl %edi, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl %dx, %eax
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    cmpw %ax, %di
@@ -33,8 +32,7 @@ define i16 @func(i16 %x, i16 %y) nounwind {
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl %eax, %edx
 ; X86-NEXT:    shll %cl, %edx
-; X86-NEXT:    movzwl %dx, %edx
-; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movzwl %dx, %esi
 ; X86-NEXT:    shrl %cl, %esi
 ; X86-NEXT:    cmpw %si, %ax
 ; X86-NEXT:    movl $65535, %eax # imm = 0xFFFF
@@ -54,8 +52,7 @@ define i16 @func2(i8 %x, i8 %y) nounwind {
 ; X64-NEXT:    addl %eax, %eax
 ; X64-NEXT:    movl %eax, %edx
 ; X64-NEXT:    shll %cl, %edx
-; X64-NEXT:    movzwl %dx, %edx
-; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    movzwl %dx, %esi
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %esi
 ; X64-NEXT:    cmpw %si, %ax
@@ -74,8 +71,7 @@ define i16 @func2(i8 %x, i8 %y) nounwind {
 ; X86-NEXT:    addl %eax, %eax
 ; X86-NEXT:    movl %eax, %edx
 ; X86-NEXT:    shll %cl, %edx
-; X86-NEXT:    movzwl %dx, %edx
-; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movzwl %dx, %esi
 ; X86-NEXT:    shrl %cl, %esi
 ; X86-NEXT:    cmpw %si, %ax
 ; X86-NEXT:    movl $65535, %eax # imm = 0xFFFF
@@ -100,8 +96,7 @@ define i16 @func3(i15 %x, i8 %y) nounwind {
 ; X64-NEXT:    addl %edi, %edi
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    shll %cl, %eax
-; X64-NEXT:    movzwl %ax, %eax
-; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    movzwl %ax, %edx
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %edx
 ; X64-NEXT:    cmpw %dx, %di
@@ -121,8 +116,7 @@ define i16 @func3(i15 %x, i8 %y) nounwind {
 ; X86-NEXT:    addl %eax, %eax
 ; X86-NEXT:    movl %eax, %edx
 ; X86-NEXT:    shll %cl, %edx
-; X86-NEXT:    movzwl %dx, %edx
-; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movzwl %dx, %esi
 ; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NEXT:    shrl %cl, %esi
 ; X86-NEXT:    cmpw %si, %ax

diff  --git a/llvm/test/CodeGen/X86/ushl_sat_vec.ll b/llvm/test/CodeGen/X86/ushl_sat_vec.ll
index a73fa32e21013..8df101852f06b 100644
--- a/llvm/test/CodeGen/X86/ushl_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/ushl_sat_vec.ll
@@ -300,100 +300,91 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT:    movl %eax, %edi
-; X86-NEXT:    shll %cl, %edi
-; X86-NEXT:    movzwl %di, %ebx
-; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movzwl %bx, %edi
 ; X86-NEXT:    shrl %cl, %edi
 ; X86-NEXT:    cmpw %di, %ax
 ; X86-NEXT:    movl $65535, %eax # imm = 0xFFFF
 ; X86-NEXT:    cmovnel %eax, %ebx
 ; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %esi, %edi
-; X86-NEXT:    movb %ch, %cl
-; X86-NEXT:    shll %cl, %edi
-; X86-NEXT:    movzwl %di, %eax
-; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movzwl %ax, %edi
 ; X86-NEXT:    shrl %cl, %edi
 ; X86-NEXT:    cmpw %di, %si
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl $65535, %edi # imm = 0xFFFF
-; X86-NEXT:    cmovnel %edi, %eax
+; X86-NEXT:    movl $65535, %esi # imm = 0xFFFF
+; X86-NEXT:    cmovnel %esi, %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %edx, %esi
-; X86-NEXT:    shll %cl, %esi
-; X86-NEXT:    movzwl %si, %eax
-; X86-NEXT:    movl %eax, %esi
-; X86-NEXT:    shrl %cl, %esi
-; X86-NEXT:    cmpw %si, %dx
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movzwl %ax, %edx
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    cmpw %dx, %bp
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    cmovnel %edi, %eax
+; X86-NEXT:    cmovnel %esi, %eax
 ; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT:    movl $65535, %esi # imm = 0xFFFF
-; X86-NEXT:    movl %ebp, %eax
-; X86-NEXT:    movl %ebp, %edx
-; X86-NEXT:    shll %cl, %edx
-; X86-NEXT:    movzwl %dx, %ebp
-; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    movzwl %bp, %edx
 ; X86-NEXT:    shrl %cl, %edx
 ; X86-NEXT:    cmpw %dx, %ax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    cmovnel %esi, %ebp
-; X86-NEXT:    movl $65535, %eax # imm = 0xFFFF
-; X86-NEXT:    movl %edx, %esi
-; X86-NEXT:    shll %cl, %esi
-; X86-NEXT:    movzwl %si, %ebx
-; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movzwl %bx, %esi
 ; X86-NEXT:    shrl %cl, %esi
 ; X86-NEXT:    cmpw %si, %dx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    cmovnel %eax, %ebx
+; X86-NEXT:    movl $65535, %esi # imm = 0xFFFF
+; X86-NEXT:    cmovnel %esi, %ebx
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %edx, %esi
-; X86-NEXT:    shll %cl, %esi
-; X86-NEXT:    movzwl %si, %edi
-; X86-NEXT:    movl %edi, %esi
-; X86-NEXT:    shrl %cl, %esi
-; X86-NEXT:    cmpw %si, %dx
-; X86-NEXT:    cmovnel %eax, %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movzwl %di, %eax
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    cmpw %ax, %dx
+; X86-NEXT:    cmovnel %esi, %edi
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %edx
-; X86-NEXT:    shll %cl, %edx
-; X86-NEXT:    movzwl %dx, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl %esi, %edx
-; X86-NEXT:    shrl %cl, %edx
-; X86-NEXT:    cmpw %dx, %ax
-; X86-NEXT:    movl $65535, %eax # imm = 0xFFFF
-; X86-NEXT:    cmovnel %eax, %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    shll %cl, %edx
-; X86-NEXT:    movzwl %dx, %edx
-; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movzwl %dx, %eax
 ; X86-NEXT:    shrl %cl, %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    cmpw %ax, %cx
+; X86-NEXT:    cmpw %ax, %si
 ; X86-NEXT:    movl $65535, %eax # imm = 0xFFFF
 ; X86-NEXT:    cmovnel %eax, %edx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movw %dx, 14(%eax)
-; X86-NEXT:    movw %si, 12(%eax)
-; X86-NEXT:    movw %di, 10(%eax)
-; X86-NEXT:    movw %bx, 8(%eax)
-; X86-NEXT:    movw %bp, 6(%eax)
-; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
-; X86-NEXT:    movw %cx, 4(%eax)
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT:    movw %cx, 2(%eax)
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT:    movw %cx, (%eax)
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movzwl %ax, %esi
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    cmpw %si, %cx
+; X86-NEXT:    movl $65535, %ecx # imm = 0xFFFF
+; X86-NEXT:    cmovnel %ecx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movw %ax, 14(%ecx)
+; X86-NEXT:    movw %dx, 12(%ecx)
+; X86-NEXT:    movw %di, 10(%ecx)
+; X86-NEXT:    movw %bx, 8(%ecx)
+; X86-NEXT:    movw %bp, 6(%ecx)
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    movw %ax, 4(%ecx)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movw %ax, 2(%ecx)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movw %ax, (%ecx)
+; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi