[llvm] r313982 - [X86] Combining CMOVs with [ANY, SIGN, ZERO]_EXTEND for cases where CMOV has constant arguments

Alexander Ivchenko via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 22 06:21:39 PDT 2017


Author: aivchenk
Date: Fri Sep 22 06:21:39 2017
New Revision: 313982

URL: http://llvm.org/viewvc/llvm-project?rev=313982&view=rev
Log:
[X86] Combining CMOVs with [ANY,SIGN,ZERO]_EXTEND for cases where CMOV has constant arguments

Combine CMOV[i16]<-[SIGN,ZERO,ANY]_EXTEND to [i32,i64] into CMOV[i32,i64].
One example of where it is useful is:

before (20 bytes)
    <foo>:
    test $0x1,%dil
    mov $0x307e,%ax
    mov $0xffff,%cx
    cmovne %ax,%cx
    movzwl %cx,%eax
    retq

after (18 bytes)
    <foo>:
    test $0x1,%dil
    mov $0x307e,%ecx
    mov $0xffff,%eax
    cmovne %ecx,%eax
    retq

Reviewers: craig.topper, aaboud, spatel, RKSimon, zvi

Reviewed By: spatel

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D36711


Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/cmov-promotion.ll
    llvm/trunk/test/CodeGen/X86/select.ll
    llvm/trunk/test/CodeGen/X86/vector-compare-results.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=313982&r1=313981&r2=313982&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Sep 22 06:21:39 2017
@@ -34491,6 +34491,47 @@ static SDValue getDivRem8(SDNode *N, Sel
   return R.getValue(1);
 }
 
+// If we face {ANY,SIGN,ZERO}_EXTEND that is applied to a CMOV with constant
+// operands and the result of CMOV is not used anywhere else - promote CMOV
+// itself instead of promoting its result. This could be beneficial, because:
+//     1) X86TargetLowering::EmitLoweredSelect later can do merging of two
+//        (or more) pseudo-CMOVs only when they go one-after-another and
+//        getting rid of result extension code after CMOV will help that.
+//     2) Promotion of constant CMOV arguments is free, hence the
+//        {ANY,SIGN,ZERO}_EXTEND will just be deleted.
+//     3) 16-bit CMOV encoding is 4 bytes, 32-bit CMOV is 3-byte, so this
+//        promotion is also good in terms of code-size.
+//        (64-bit CMOV is 4-bytes, that's why we don't do 32-bit => 64-bit
+//         promotion).
+static SDValue combineToExtendCMOV(SDNode *Extend, SelectionDAG &DAG) {
+  SDValue CMovN = Extend->getOperand(0);
+  if (CMovN.getOpcode() != X86ISD::CMOV)
+    return SDValue();
+
+  EVT TargetVT = Extend->getValueType(0);
+  unsigned ExtendOpcode = Extend->getOpcode();
+  SDLoc DL(Extend);
+
+  EVT VT = CMovN.getValueType();
+  SDValue CMovOp0 = CMovN.getOperand(0);
+  SDValue CMovOp1 = CMovN.getOperand(1);
+
+  bool DoPromoteCMOV =
+      (VT == MVT::i16 && (TargetVT == MVT::i32 || TargetVT == MVT::i64)) &&
+      CMovN.hasOneUse() &&
+      (isa<ConstantSDNode>(CMovOp0.getNode()) &&
+       isa<ConstantSDNode>(CMovOp1.getNode()));
+
+  if (!DoPromoteCMOV)
+    return SDValue();
+
+  CMovOp0 = DAG.getNode(ExtendOpcode, DL, TargetVT, CMovOp0);
+  CMovOp1 = DAG.getNode(ExtendOpcode, DL, TargetVT, CMovOp1);
+
+  return DAG.getNode(X86ISD::CMOV, DL, TargetVT, CMovOp0, CMovOp1,
+                     CMovN.getOperand(2), CMovN.getOperand(3));
+}
+
 /// Convert a SEXT or ZEXT of a vector to a SIGN_EXTEND_VECTOR_INREG or
 /// ZERO_EXTEND_VECTOR_INREG, this requires the splitting (or concatenating
 /// with UNDEFs) of the input to vectors of the same size as the target type
@@ -34605,6 +34646,9 @@ static SDValue combineSext(SDNode *N, Se
   if (SDValue DivRem8 = getDivRem8(N, DAG))
     return DivRem8;
 
+  if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
+    return NewCMov;
+
   if (!DCI.isBeforeLegalizeOps()) {
     if (InVT == MVT::i1) {
       SDValue Zero = DAG.getConstant(0, DL, VT);
@@ -34757,6 +34801,9 @@ static SDValue combineZext(SDNode *N, Se
     }
   }
 
+  if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
+    return NewCMov;
+
   if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget))
     return V;
 

Modified: llvm/trunk/test/CodeGen/X86/cmov-promotion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cmov-promotion.ll?rev=313982&r1=313981&r2=313982&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cmov-promotion.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cmov-promotion.ll Fri Sep 22 06:21:39 2017
@@ -90,21 +90,19 @@ define i32 @cmov_zpromotion_16_to_32(i1
 ; CMOV-LABEL: cmov_zpromotion_16_to_32:
 ; CMOV:       # BB#0:
 ; CMOV-NEXT:    testb $1, %dil
-; CMOV-NEXT:    movw $12414, %ax # imm = 0x307E
-; CMOV-NEXT:    movw $-1, %cx
-; CMOV-NEXT:    cmovnew %ax, %cx
-; CMOV-NEXT:    movzwl %cx, %eax
+; CMOV-NEXT:    movl $12414, %ecx # imm = 0x307E
+; CMOV-NEXT:    movl $65535, %eax # imm = 0xFFFF
+; CMOV-NEXT:    cmovnel %ecx, %eax
 ; CMOV-NEXT:    retq
 ;
 ; NO_CMOV-LABEL: cmov_zpromotion_16_to_32:
 ; NO_CMOV:       # BB#0:
 ; NO_CMOV-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; NO_CMOV-NEXT:    movw $12414, %ax # imm = 0x307E
+; NO_CMOV-NEXT:    movl $12414, %eax # imm = 0x307E
 ; NO_CMOV-NEXT:    jne .LBB3_2
 ; NO_CMOV-NEXT:  # BB#1:
-; NO_CMOV-NEXT:    movw $-1, %ax
+; NO_CMOV-NEXT:    movl $65535, %eax # imm = 0xFFFF
 ; NO_CMOV-NEXT:  .LBB3_2:
-; NO_CMOV-NEXT:    movzwl %ax, %eax
 ; NO_CMOV-NEXT:    retl
   %t0 = select i1 %c, i16 12414, i16 -1
   %ret = zext i16 %t0 to i32
@@ -115,21 +113,19 @@ define i64 @cmov_zpromotion_16_to_64(i1
 ; CMOV-LABEL: cmov_zpromotion_16_to_64:
 ; CMOV:       # BB#0:
 ; CMOV-NEXT:    testb $1, %dil
-; CMOV-NEXT:    movw $12414, %ax # imm = 0x307E
-; CMOV-NEXT:    movw $-1, %cx
-; CMOV-NEXT:    cmovnew %ax, %cx
-; CMOV-NEXT:    movzwl %cx, %eax
+; CMOV-NEXT:    movl $12414, %ecx # imm = 0x307E
+; CMOV-NEXT:    movl $65535, %eax # imm = 0xFFFF
+; CMOV-NEXT:    cmovneq %rcx, %rax
 ; CMOV-NEXT:    retq
 ;
 ; NO_CMOV-LABEL: cmov_zpromotion_16_to_64:
 ; NO_CMOV:       # BB#0:
 ; NO_CMOV-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; NO_CMOV-NEXT:    movw $12414, %ax # imm = 0x307E
+; NO_CMOV-NEXT:    movl $12414, %eax # imm = 0x307E
 ; NO_CMOV-NEXT:    jne .LBB4_2
 ; NO_CMOV-NEXT:  # BB#1:
-; NO_CMOV-NEXT:    movw $-1, %ax
+; NO_CMOV-NEXT:    movl $65535, %eax # imm = 0xFFFF
 ; NO_CMOV-NEXT:  .LBB4_2:
-; NO_CMOV-NEXT:    movzwl %ax, %eax
 ; NO_CMOV-NEXT:    xorl %edx, %edx
 ; NO_CMOV-NEXT:    retl
   %t0 = select i1 %c, i16 12414, i16 -1
@@ -250,21 +246,19 @@ define i32 @cmov_spromotion_16_to_32(i1
 ; CMOV-LABEL: cmov_spromotion_16_to_32:
 ; CMOV:       # BB#0:
 ; CMOV-NEXT:    testb $1, %dil
-; CMOV-NEXT:    movw $12414, %ax # imm = 0x307E
-; CMOV-NEXT:    movw $-1, %cx
-; CMOV-NEXT:    cmovnew %ax, %cx
-; CMOV-NEXT:    movswl %cx, %eax
+; CMOV-NEXT:    movl $12414, %ecx # imm = 0x307E
+; CMOV-NEXT:    movl $-1, %eax
+; CMOV-NEXT:    cmovnel %ecx, %eax
 ; CMOV-NEXT:    retq
 ;
 ; NO_CMOV-LABEL: cmov_spromotion_16_to_32:
 ; NO_CMOV:       # BB#0:
 ; NO_CMOV-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; NO_CMOV-NEXT:    movw $12414, %ax # imm = 0x307E
+; NO_CMOV-NEXT:    movl $12414, %eax # imm = 0x307E
 ; NO_CMOV-NEXT:    jne .LBB9_2
 ; NO_CMOV-NEXT:  # BB#1:
-; NO_CMOV-NEXT:    movw $-1, %ax
+; NO_CMOV-NEXT:    movl $-1, %eax
 ; NO_CMOV-NEXT:  .LBB9_2:
-; NO_CMOV-NEXT:    cwtl
 ; NO_CMOV-NEXT:    retl
   %t0 = select i1 %c, i16 12414, i16 -1
   %ret = sext i16 %t0 to i32
@@ -275,21 +269,19 @@ define i64 @cmov_spromotion_16_to_64(i1
 ; CMOV-LABEL: cmov_spromotion_16_to_64:
 ; CMOV:       # BB#0:
 ; CMOV-NEXT:    testb $1, %dil
-; CMOV-NEXT:    movw $12414, %ax # imm = 0x307E
-; CMOV-NEXT:    movw $-1, %cx
-; CMOV-NEXT:    cmovnew %ax, %cx
-; CMOV-NEXT:    movswq %cx, %rax
+; CMOV-NEXT:    movl $12414, %ecx # imm = 0x307E
+; CMOV-NEXT:    movq $-1, %rax
+; CMOV-NEXT:    cmovneq %rcx, %rax
 ; CMOV-NEXT:    retq
 ;
 ; NO_CMOV-LABEL: cmov_spromotion_16_to_64:
 ; NO_CMOV:       # BB#0:
 ; NO_CMOV-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; NO_CMOV-NEXT:    movw $12414, %ax # imm = 0x307E
+; NO_CMOV-NEXT:    movl $12414, %eax # imm = 0x307E
 ; NO_CMOV-NEXT:    jne .LBB10_2
 ; NO_CMOV-NEXT:  # BB#1:
-; NO_CMOV-NEXT:    movw $-1, %ax
+; NO_CMOV-NEXT:    movl $-1, %eax
 ; NO_CMOV-NEXT:  .LBB10_2:
-; NO_CMOV-NEXT:    cwtl
 ; NO_CMOV-NEXT:    movl %eax, %edx
 ; NO_CMOV-NEXT:    sarl $31, %edx
 ; NO_CMOV-NEXT:    retl

Modified: llvm/trunk/test/CodeGen/X86/select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/select.ll?rev=313982&r1=313981&r2=313982&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/select.ll (original)
+++ llvm/trunk/test/CodeGen/X86/select.ll Fri Sep 22 06:21:39 2017
@@ -39,44 +39,58 @@ define i32 @test1(%0* %p, %0* %q, i1 %r)
 
 ; PR2139
 define i32 @test2() nounwind {
-; CHECK-LABEL: test2:
-; CHECK:       ## BB#0: ## %entry
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    callq _return_false
-; CHECK-NEXT:    xorl %ecx, %ecx
-; CHECK-NEXT:    testb $1, %al
-; CHECK-NEXT:    movw $-480, %ax ## imm = 0xFE20
-; CHECK-NEXT:    cmovnew %cx, %ax
-; CHECK-NEXT:    cwtl
-; CHECK-NEXT:    shll $3, %eax
-; CHECK-NEXT:    cmpl $32768, %eax ## imm = 0x8000
-; CHECK-NEXT:    jge LBB1_1
-; CHECK-NEXT:  ## BB#2: ## %bb91
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    popq %rcx
-; CHECK-NEXT:    retq
-; CHECK-NEXT:  LBB1_1: ## %bb90
-; CHECK-NEXT:    ## -- End function
+; GENERIC-LABEL: test2:
+; GENERIC:       ## BB#0: ## %entry
+; GENERIC-NEXT:    pushq %rax
+; GENERIC-NEXT:    callq _return_false
+; GENERIC-NEXT:    xorl %ecx, %ecx
+; GENERIC-NEXT:    testb $1, %al
+; GENERIC-NEXT:    movl $-480, %eax
+; GENERIC-NEXT:    cmovnel %ecx, %eax
+; GENERIC-NEXT:    shll $3, %eax
+; GENERIC-NEXT:    cmpl $32768, %eax ## imm = 0x8000
+; GENERIC-NEXT:    jge LBB1_1
+; GENERIC-NEXT:  ## BB#2: ## %bb91
+; GENERIC-NEXT:    xorl %eax, %eax
+; GENERIC-NEXT:    popq %rcx
+; GENERIC-NEXT:    retq
+; GENERIC-NEXT:  LBB1_1: ## %bb90
+; GENERIC-NEXT:    ## -- End function
+;
+; ATOM-LABEL: test2:
+; ATOM:       ## BB#0: ## %entry
+; ATOM-NEXT:    pushq %rax
+; ATOM-NEXT:    callq _return_false
+; ATOM-NEXT:    xorl %ecx, %ecx
+; ATOM-NEXT:    movl $-480, %edx
+; ATOM-NEXT:    testb $1, %al
+; ATOM-NEXT:    cmovnel %ecx, %edx
+; ATOM-NEXT:    shll $3, %edx
+; ATOM-NEXT:    cmpl $32768, %edx ## imm = 0x8000
+; ATOM-NEXT:    jge LBB1_1
+; ATOM-NEXT:  ## BB#2: ## %bb91
+; ATOM-NEXT:    xorl %eax, %eax
+; ATOM-NEXT:    popq %rcx
+; ATOM-NEXT:    retq
+; ATOM-NEXT:  LBB1_1: ## %bb90
+; ATOM-NEXT:    ## -- End function
 ;
 ; MCU-LABEL: test2:
 ; MCU:       # BB#0: # %entry
 ; MCU-NEXT:    calll return_false
+; MCU-NEXT:    xorl    %ecx, %ecx
 ; MCU-NEXT:    testb $1, %al
-; MCU-NEXT:    jne .LBB1_1
-; MCU-NEXT:  # BB#2: # %entry
-; MCU-NEXT:    movw $-480, %ax # imm = 0xFE20
-; MCU-NEXT:    jmp .LBB1_3
-; MCU-NEXT:  .LBB1_1:
-; MCU-NEXT:    xorl %eax, %eax
-; MCU-NEXT:  .LBB1_3: # %entry
-; MCU-NEXT:    cwtl
-; MCU-NEXT:    shll $3, %eax
-; MCU-NEXT:    cmpl $32768, %eax # imm = 0x8000
-; MCU-NEXT:    jge .LBB1_4
-; MCU-NEXT:  # BB#5: # %bb91
+; MCU-NEXT:    jne .LBB1_2
+; MCU-NEXT:  # BB#1: # %entry
+; MCU-NEXT:    movl $-480, %ecx # imm = 0xFE20
+; MCU-NEXT:  .LBB1_2:
+; MCU-NEXT:    shll $3, %ecx
+; MCU-NEXT:    cmpl $32768, %ecx # imm = 0x8000
+; MCU-NEXT:    jge .LBB1_3
+; MCU-NEXT:  # BB#4: # %bb91
 ; MCU-NEXT:    xorl %eax, %eax
 ; MCU-NEXT:    retl
-; MCU-NEXT:  .LBB1_4: # %bb90
+; MCU-NEXT:  .LBB1_3: # %bb90
 entry:
   %tmp73 = tail call i1 @return_false()
   %g.0 = select i1 %tmp73, i16 0, i16 -480

Modified: llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-results.ll?rev=313982&r1=313981&r2=313982&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-results.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-results.ll Fri Sep 22 06:21:39 2017
@@ -5791,51 +5791,51 @@ define <64 x i1> @test_cmp_v64i16(<64 x
 ; AVX512BW-NEXT:    vpextrw $1, %xmm5, %edx
 ; AVX512BW-NEXT:    xorl %eax, %eax
 ; AVX512BW-NEXT:    cmpw %cx, %dx
-; AVX512BW-NEXT:    movw $-1, %cx
+; AVX512BW-NEXT:    movl $65535, %ecx # imm = 0xFFFF
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vmovd %xmm4, %esi
 ; AVX512BW-NEXT:    vmovd %xmm5, %edi
 ; AVX512BW-NEXT:    cmpw %si, %di
 ; AVX512BW-NEXT:    movl $0, %esi
-; AVX512BW-NEXT:    cmovgw %cx, %si
+; AVX512BW-NEXT:    cmovgl %ecx, %esi
 ; AVX512BW-NEXT:    vmovd %esi, %xmm6
 ; AVX512BW-NEXT:    vpinsrw $1, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $2, %xmm4, %edx
 ; AVX512BW-NEXT:    vpextrw $2, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $2, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $3, %xmm4, %edx
 ; AVX512BW-NEXT:    vpextrw $3, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $3, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $4, %xmm4, %edx
 ; AVX512BW-NEXT:    vpextrw $4, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $4, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $5, %xmm4, %edx
 ; AVX512BW-NEXT:    vpextrw $5, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $5, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $6, %xmm4, %edx
 ; AVX512BW-NEXT:    vpextrw $6, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $6, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $7, %xmm4, %edx
 ; AVX512BW-NEXT:    vpextrw $7, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $7, %edx, %xmm6, %xmm4
 ; AVX512BW-NEXT:    vextracti32x4 $2, %zmm2, %xmm5
 ; AVX512BW-NEXT:    vpextrw $1, %xmm5, %edx
@@ -5843,49 +5843,49 @@ define <64 x i1> @test_cmp_v64i16(<64 x
 ; AVX512BW-NEXT:    vpextrw $1, %xmm6, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vmovd %xmm5, %esi
 ; AVX512BW-NEXT:    vmovd %xmm6, %edi
 ; AVX512BW-NEXT:    cmpw %si, %di
 ; AVX512BW-NEXT:    movl $0, %esi
-; AVX512BW-NEXT:    cmovgw %cx, %si
+; AVX512BW-NEXT:    cmovgl %ecx, %esi
 ; AVX512BW-NEXT:    vmovd %esi, %xmm7
 ; AVX512BW-NEXT:    vpinsrw $1, %edx, %xmm7, %xmm7
 ; AVX512BW-NEXT:    vpextrw $2, %xmm5, %edx
 ; AVX512BW-NEXT:    vpextrw $2, %xmm6, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $2, %edx, %xmm7, %xmm7
 ; AVX512BW-NEXT:    vpextrw $3, %xmm5, %edx
 ; AVX512BW-NEXT:    vpextrw $3, %xmm6, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $3, %edx, %xmm7, %xmm7
 ; AVX512BW-NEXT:    vpextrw $4, %xmm5, %edx
 ; AVX512BW-NEXT:    vpextrw $4, %xmm6, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $4, %edx, %xmm7, %xmm7
 ; AVX512BW-NEXT:    vpextrw $5, %xmm5, %edx
 ; AVX512BW-NEXT:    vpextrw $5, %xmm6, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $5, %edx, %xmm7, %xmm7
 ; AVX512BW-NEXT:    vpextrw $6, %xmm5, %edx
 ; AVX512BW-NEXT:    vpextrw $6, %xmm6, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $6, %edx, %xmm7, %xmm7
 ; AVX512BW-NEXT:    vpextrw $7, %xmm5, %edx
 ; AVX512BW-NEXT:    vpextrw $7, %xmm6, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $7, %edx, %xmm7, %xmm5
 ; AVX512BW-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
 ; AVX512BW-NEXT:    vextracti128 $1, %ymm2, %xmm5
@@ -5894,97 +5894,97 @@ define <64 x i1> @test_cmp_v64i16(<64 x
 ; AVX512BW-NEXT:    vpextrw $1, %xmm6, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vmovd %xmm5, %esi
 ; AVX512BW-NEXT:    vmovd %xmm6, %edi
 ; AVX512BW-NEXT:    cmpw %si, %di
 ; AVX512BW-NEXT:    movl $0, %esi
-; AVX512BW-NEXT:    cmovgw %cx, %si
+; AVX512BW-NEXT:    cmovgl %ecx, %esi
 ; AVX512BW-NEXT:    vmovd %esi, %xmm7
 ; AVX512BW-NEXT:    vpinsrw $1, %edx, %xmm7, %xmm7
 ; AVX512BW-NEXT:    vpextrw $2, %xmm5, %edx
 ; AVX512BW-NEXT:    vpextrw $2, %xmm6, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $2, %edx, %xmm7, %xmm7
 ; AVX512BW-NEXT:    vpextrw $3, %xmm5, %edx
 ; AVX512BW-NEXT:    vpextrw $3, %xmm6, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $3, %edx, %xmm7, %xmm7
 ; AVX512BW-NEXT:    vpextrw $4, %xmm5, %edx
 ; AVX512BW-NEXT:    vpextrw $4, %xmm6, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $4, %edx, %xmm7, %xmm7
 ; AVX512BW-NEXT:    vpextrw $5, %xmm5, %edx
 ; AVX512BW-NEXT:    vpextrw $5, %xmm6, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $5, %edx, %xmm7, %xmm7
 ; AVX512BW-NEXT:    vpextrw $6, %xmm5, %edx
 ; AVX512BW-NEXT:    vpextrw $6, %xmm6, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $6, %edx, %xmm7, %xmm7
 ; AVX512BW-NEXT:    vpextrw $7, %xmm5, %edx
 ; AVX512BW-NEXT:    vpextrw $7, %xmm6, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $7, %edx, %xmm7, %xmm5
 ; AVX512BW-NEXT:    vpextrw $1, %xmm2, %edx
 ; AVX512BW-NEXT:    vpextrw $1, %xmm0, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vmovd %xmm2, %esi
 ; AVX512BW-NEXT:    vmovd %xmm0, %edi
 ; AVX512BW-NEXT:    cmpw %si, %di
 ; AVX512BW-NEXT:    movl $0, %esi
-; AVX512BW-NEXT:    cmovgw %cx, %si
+; AVX512BW-NEXT:    cmovgl %ecx, %esi
 ; AVX512BW-NEXT:    vmovd %esi, %xmm6
 ; AVX512BW-NEXT:    vpinsrw $1, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $2, %xmm2, %edx
 ; AVX512BW-NEXT:    vpextrw $2, %xmm0, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $2, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $3, %xmm2, %edx
 ; AVX512BW-NEXT:    vpextrw $3, %xmm0, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $3, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $4, %xmm2, %edx
 ; AVX512BW-NEXT:    vpextrw $4, %xmm0, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $4, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $5, %xmm2, %edx
 ; AVX512BW-NEXT:    vpextrw $5, %xmm0, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $5, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $6, %xmm2, %edx
 ; AVX512BW-NEXT:    vpextrw $6, %xmm0, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $6, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $7, %xmm2, %edx
 ; AVX512BW-NEXT:    vpextrw $7, %xmm0, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $7, %edx, %xmm6, %xmm0
 ; AVX512BW-NEXT:    vinserti128 $1, %xmm5, %ymm0, %ymm0
 ; AVX512BW-NEXT:    vinserti64x4 $1, %ymm4, %zmm0, %zmm0
@@ -5995,49 +5995,49 @@ define <64 x i1> @test_cmp_v64i16(<64 x
 ; AVX512BW-NEXT:    vpextrw $1, %xmm4, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vmovd %xmm2, %esi
 ; AVX512BW-NEXT:    vmovd %xmm4, %edi
 ; AVX512BW-NEXT:    cmpw %si, %di
 ; AVX512BW-NEXT:    movl $0, %esi
-; AVX512BW-NEXT:    cmovgw %cx, %si
+; AVX512BW-NEXT:    cmovgl %ecx, %esi
 ; AVX512BW-NEXT:    vmovd %esi, %xmm5
 ; AVX512BW-NEXT:    vpinsrw $1, %edx, %xmm5, %xmm5
 ; AVX512BW-NEXT:    vpextrw $2, %xmm2, %edx
 ; AVX512BW-NEXT:    vpextrw $2, %xmm4, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $2, %edx, %xmm5, %xmm5
 ; AVX512BW-NEXT:    vpextrw $3, %xmm2, %edx
 ; AVX512BW-NEXT:    vpextrw $3, %xmm4, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $3, %edx, %xmm5, %xmm5
 ; AVX512BW-NEXT:    vpextrw $4, %xmm2, %edx
 ; AVX512BW-NEXT:    vpextrw $4, %xmm4, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $4, %edx, %xmm5, %xmm5
 ; AVX512BW-NEXT:    vpextrw $5, %xmm2, %edx
 ; AVX512BW-NEXT:    vpextrw $5, %xmm4, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $5, %edx, %xmm5, %xmm5
 ; AVX512BW-NEXT:    vpextrw $6, %xmm2, %edx
 ; AVX512BW-NEXT:    vpextrw $6, %xmm4, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $6, %edx, %xmm5, %xmm5
 ; AVX512BW-NEXT:    vpextrw $7, %xmm2, %edx
 ; AVX512BW-NEXT:    vpextrw $7, %xmm4, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $7, %edx, %xmm5, %xmm2
 ; AVX512BW-NEXT:    vextracti32x4 $2, %zmm3, %xmm4
 ; AVX512BW-NEXT:    vpextrw $1, %xmm4, %edx
@@ -6045,49 +6045,49 @@ define <64 x i1> @test_cmp_v64i16(<64 x
 ; AVX512BW-NEXT:    vpextrw $1, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vmovd %xmm4, %esi
 ; AVX512BW-NEXT:    vmovd %xmm5, %edi
 ; AVX512BW-NEXT:    cmpw %si, %di
 ; AVX512BW-NEXT:    movl $0, %esi
-; AVX512BW-NEXT:    cmovgw %cx, %si
+; AVX512BW-NEXT:    cmovgl %ecx, %esi
 ; AVX512BW-NEXT:    vmovd %esi, %xmm6
 ; AVX512BW-NEXT:    vpinsrw $1, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $2, %xmm4, %edx
 ; AVX512BW-NEXT:    vpextrw $2, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $2, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $3, %xmm4, %edx
 ; AVX512BW-NEXT:    vpextrw $3, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $3, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $4, %xmm4, %edx
 ; AVX512BW-NEXT:    vpextrw $4, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $4, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $5, %xmm4, %edx
 ; AVX512BW-NEXT:    vpextrw $5, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $5, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $6, %xmm4, %edx
 ; AVX512BW-NEXT:    vpextrw $6, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $6, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $7, %xmm4, %edx
 ; AVX512BW-NEXT:    vpextrw $7, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $7, %edx, %xmm6, %xmm4
 ; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm4, %ymm2
 ; AVX512BW-NEXT:    vextracti128 $1, %ymm3, %xmm4
@@ -6096,96 +6096,96 @@ define <64 x i1> @test_cmp_v64i16(<64 x
 ; AVX512BW-NEXT:    vpextrw $1, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vmovd %xmm4, %esi
 ; AVX512BW-NEXT:    vmovd %xmm5, %edi
 ; AVX512BW-NEXT:    cmpw %si, %di
 ; AVX512BW-NEXT:    movl $0, %esi
-; AVX512BW-NEXT:    cmovgw %cx, %si
+; AVX512BW-NEXT:    cmovgl %ecx, %esi
 ; AVX512BW-NEXT:    vmovd %esi, %xmm6
 ; AVX512BW-NEXT:    vpinsrw $1, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $2, %xmm4, %edx
 ; AVX512BW-NEXT:    vpextrw $2, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $2, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $3, %xmm4, %edx
 ; AVX512BW-NEXT:    vpextrw $3, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $3, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $4, %xmm4, %edx
 ; AVX512BW-NEXT:    vpextrw $4, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $4, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $5, %xmm4, %edx
 ; AVX512BW-NEXT:    vpextrw $5, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $5, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $6, %xmm4, %edx
 ; AVX512BW-NEXT:    vpextrw $6, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $6, %edx, %xmm6, %xmm6
 ; AVX512BW-NEXT:    vpextrw $7, %xmm4, %edx
 ; AVX512BW-NEXT:    vpextrw $7, %xmm5, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $7, %edx, %xmm6, %xmm4
 ; AVX512BW-NEXT:    vpextrw $1, %xmm3, %edx
 ; AVX512BW-NEXT:    vpextrw $1, %xmm1, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vmovd %xmm3, %esi
 ; AVX512BW-NEXT:    vmovd %xmm1, %edi
 ; AVX512BW-NEXT:    cmpw %si, %di
 ; AVX512BW-NEXT:    movl $0, %esi
-; AVX512BW-NEXT:    cmovgw %cx, %si
+; AVX512BW-NEXT:    cmovgl %ecx, %esi
 ; AVX512BW-NEXT:    vmovd %esi, %xmm5
 ; AVX512BW-NEXT:    vpinsrw $1, %edx, %xmm5, %xmm5
 ; AVX512BW-NEXT:    vpextrw $2, %xmm3, %edx
 ; AVX512BW-NEXT:    vpextrw $2, %xmm1, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $2, %edx, %xmm5, %xmm5
 ; AVX512BW-NEXT:    vpextrw $3, %xmm3, %edx
 ; AVX512BW-NEXT:    vpextrw $3, %xmm1, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $3, %edx, %xmm5, %xmm5
 ; AVX512BW-NEXT:    vpextrw $4, %xmm3, %edx
 ; AVX512BW-NEXT:    vpextrw $4, %xmm1, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $4, %edx, %xmm5, %xmm5
 ; AVX512BW-NEXT:    vpextrw $5, %xmm3, %edx
 ; AVX512BW-NEXT:    vpextrw $5, %xmm1, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $5, %edx, %xmm5, %xmm5
 ; AVX512BW-NEXT:    vpextrw $6, %xmm3, %edx
 ; AVX512BW-NEXT:    vpextrw $6, %xmm1, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
 ; AVX512BW-NEXT:    movl $0, %edx
-; AVX512BW-NEXT:    cmovgw %cx, %dx
+; AVX512BW-NEXT:    cmovgl %ecx, %edx
 ; AVX512BW-NEXT:    vpinsrw $6, %edx, %xmm5, %xmm5
 ; AVX512BW-NEXT:    vpextrw $7, %xmm3, %edx
 ; AVX512BW-NEXT:    vpextrw $7, %xmm1, %esi
 ; AVX512BW-NEXT:    cmpw %dx, %si
-; AVX512BW-NEXT:    cmovgw %cx, %ax
+; AVX512BW-NEXT:    cmovgl %ecx, %eax
 ; AVX512BW-NEXT:    vpinsrw $7, %eax, %xmm5, %xmm1
 ; AVX512BW-NEXT:    vinserti128 $1, %xmm4, %ymm1, %ymm1
 ; AVX512BW-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1




More information about the llvm-commits mailing list