[llvm] 3d00e1e - [X86] LowerSELECTWithCmpZero - fold "SELECT (AND(X,1) == 0), 0, -1 -> NEG(AND(X,1))"

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 8 02:40:29 PDT 2024


Author: Simon Pilgrim
Date: 2024-09-08T10:08:50+01:00
New Revision: 3d00e1e208413738fc978d0a4b4ff853c0413618

URL: https://github.com/llvm/llvm-project/commit/3d00e1e208413738fc978d0a4b4ff853c0413618
DIFF: https://github.com/llvm/llvm-project/commit/3d00e1e208413738fc978d0a4b4ff853c0413618.diff

LOG: [X86] LowerSELECTWithCmpZero - fold "SELECT (AND(X,1) == 0), 0, -1 -> NEG(AND(X,1))"

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/avx512-calling-conv.ll
    llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
    llvm/test/CodeGen/X86/avx512-insert-extract.ll
    llvm/test/CodeGen/X86/pr43507.ll
    llvm/test/CodeGen/X86/setcc-lowering.ll
    llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
    llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3b90ab3acddbd6..b8a6f10cab623d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24086,6 +24086,23 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
 
   if (X86CC == X86::COND_E && CmpVal.getOpcode() == ISD::AND &&
       isOneConstant(CmpVal.getOperand(1))) {
+    auto SplatLSB = [&]() {
+      // we need mask of all zeros or ones with same size of the other
+      // operands.
+      SDValue Neg = CmpVal;
+      if (CmpVT.bitsGT(VT))
+        Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpVal);
+      else if (CmpVT.bitsLT(VT))
+        Neg = DAG.getNode(
+            ISD::AND, DL, VT,
+            DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpVal.getOperand(0)),
+            DAG.getConstant(1, DL, VT));
+      return DAG.getNegative(Neg, DL, VT); // -(and (x, 0x1))
+    };
+
+    // SELECT (AND(X,1) == 0), 0, -1 -> NEG(AND(X,1))
+    if (isNullConstant(LHS) && isAllOnesConstant(RHS))
+      return SplatLSB();
 
     SDValue Src1, Src2;
     auto isIdentityPattern = [&]() {
@@ -24116,17 +24133,7 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
     // SELECT (AND(X,1) == 0), Y, (ADD Y, Z) -> (ADD Y, (AND NEG(AND(X,1)), Z))
     // SELECT (AND(X,1) == 0), Y, (SUB Y, Z) -> (SUB Y, (AND NEG(AND(X,1)), Z))
     if (!Subtarget.canUseCMOV() && isIdentityPattern()) {
-      // we need mask of all zeros or ones with same size of the other
-      // operands.
-      SDValue Neg = CmpVal;
-      if (CmpVT.bitsGT(VT))
-        Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpVal);
-      else if (CmpVT.bitsLT(VT))
-        Neg = DAG.getNode(
-            ISD::AND, DL, VT,
-            DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpVal.getOperand(0)),
-            DAG.getConstant(1, DL, VT));
-      SDValue Mask = DAG.getNegative(Neg, DL, VT); // -(and (x, 0x1))
+      SDValue Mask = SplatLSB();
       SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z
       return DAG.getNode(RHS.getOpcode(), DL, VT, Src2, And);  // y Op And
     }

diff  --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
index b39b089faa2a5e..c27cced9d5ffa7 100644
--- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll
+++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
@@ -679,13 +679,6 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    pushq %r13
 ; KNL-NEXT:    pushq %r12
 ; KNL-NEXT:    pushq %rbx
-; KNL-NEXT:    xorl %r10d, %r10d
-; KNL-NEXT:    testb $1, {{[0-9]+}}(%rsp)
-; KNL-NEXT:    movl $65535, %eax ## imm = 0xFFFF
-; KNL-NEXT:    movl $0, %r11d
-; KNL-NEXT:    cmovnel %eax, %r11d
-; KNL-NEXT:    testb $1, {{[0-9]+}}(%rsp)
-; KNL-NEXT:    cmovnel %eax, %r10d
 ; KNL-NEXT:    movq %rdi, %rax
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
 ; KNL-NEXT:    andl $1, %edi
@@ -905,10 +898,10 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    kmovw %ecx, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    korw %k1, %k0, %k0
-; KNL-NEXT:    kmovw %r11d, %k1
-; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
-; KNL-NEXT:    kandw %k2, %k0, %k0
-; KNL-NEXT:    kmovw %r10d, %k2
+; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
+; KNL-NEXT:    kandw %k1, %k0, %k0
+; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
+; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k2
 ; KNL-NEXT:    kandw %k1, %k2, %k1
 ; KNL-NEXT:    kmovw %k1, %edx
 ; KNL-NEXT:    kshiftrw $1, %k0, %k1
@@ -1316,240 +1309,233 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL_X32-NEXT:    pushl %edi
 ; KNL_X32-NEXT:    pushl %esi
 ; KNL_X32-NEXT:    subl $16, %esp
-; KNL_X32-NEXT:    xorl %eax, %eax
-; KNL_X32-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; KNL_X32-NEXT:    movl $65535, %edx ## imm = 0xFFFF
-; KNL_X32-NEXT:    movl $0, %ecx
-; KNL_X32-NEXT:    cmovnel %edx, %ecx
-; KNL_X32-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; KNL_X32-NEXT:    cmovnel %edx, %eax
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    andl $1, %edx
-; KNL_X32-NEXT:    kmovw %edx, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    andl $1, %eax
+; KNL_X32-NEXT:    kmovw %eax, %k0
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $14, %k1, %k1
 ; KNL_X32-NEXT:    korw %k1, %k0, %k0
-; KNL_X32-NEXT:    movw $-5, %dx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movw $-5, %ax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $13, %k1, %k1
 ; KNL_X32-NEXT:    korw %k1, %k0, %k0
-; KNL_X32-NEXT:    movw $-9, %dx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movw $-9, %ax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $12, %k1, %k1
 ; KNL_X32-NEXT:    korw %k1, %k0, %k0
-; KNL_X32-NEXT:    movw $-17, %dx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movw $-17, %ax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $11, %k1, %k1
 ; KNL_X32-NEXT:    korw %k1, %k0, %k0
-; KNL_X32-NEXT:    movw $-33, %dx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movw $-33, %ax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $10, %k1, %k1
 ; KNL_X32-NEXT:    korw %k1, %k0, %k0
-; KNL_X32-NEXT:    movw $-65, %dx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movw $-65, %ax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $9, %k1, %k1
 ; KNL_X32-NEXT:    korw %k1, %k0, %k0
-; KNL_X32-NEXT:    movw $-129, %dx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movw $-129, %ax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $8, %k1, %k1
 ; KNL_X32-NEXT:    korw %k1, %k0, %k0
-; KNL_X32-NEXT:    movw $-257, %dx ## imm = 0xFEFF
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movw $-257, %ax ## imm = 0xFEFF
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $7, %k1, %k1
 ; KNL_X32-NEXT:    korw %k1, %k0, %k0
-; KNL_X32-NEXT:    movw $-513, %dx ## imm = 0xFDFF
-; KNL_X32-NEXT:    kmovw %edx, %k7
+; KNL_X32-NEXT:    movw $-513, %ax ## imm = 0xFDFF
+; KNL_X32-NEXT:    kmovw %eax, %k7
 ; KNL_X32-NEXT:    kandw %k7, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $6, %k1, %k1
 ; KNL_X32-NEXT:    korw %k1, %k0, %k0
-; KNL_X32-NEXT:    movw $-1025, %dx ## imm = 0xFBFF
-; KNL_X32-NEXT:    kmovw %edx, %k4
+; KNL_X32-NEXT:    movw $-1025, %ax ## imm = 0xFBFF
+; KNL_X32-NEXT:    kmovw %eax, %k4
 ; KNL_X32-NEXT:    kandw %k4, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $5, %k1, %k1
 ; KNL_X32-NEXT:    korw %k1, %k0, %k0
-; KNL_X32-NEXT:    movw $-2049, %dx ## imm = 0xF7FF
-; KNL_X32-NEXT:    kmovw %edx, %k3
+; KNL_X32-NEXT:    movw $-2049, %ax ## imm = 0xF7FF
+; KNL_X32-NEXT:    kmovw %eax, %k3
 ; KNL_X32-NEXT:    kandw %k3, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $4, %k1, %k1
 ; KNL_X32-NEXT:    korw %k1, %k0, %k0
-; KNL_X32-NEXT:    movw $-4097, %dx ## imm = 0xEFFF
-; KNL_X32-NEXT:    kmovw %edx, %k2
+; KNL_X32-NEXT:    movw $-4097, %ax ## imm = 0xEFFF
+; KNL_X32-NEXT:    kmovw %eax, %k2
 ; KNL_X32-NEXT:    kandw %k2, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $3, %k1, %k1
 ; KNL_X32-NEXT:    korw %k1, %k0, %k0
-; KNL_X32-NEXT:    movw $-8193, %dx ## imm = 0xDFFF
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movw $-8193, %ax ## imm = 0xDFFF
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k5
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k5
 ; KNL_X32-NEXT:    kshiftlw $15, %k5, %k5
 ; KNL_X32-NEXT:    kshiftrw $2, %k5, %k5
 ; KNL_X32-NEXT:    korw %k5, %k0, %k5
-; KNL_X32-NEXT:    movw $-16385, %dx ## imm = 0xBFFF
-; KNL_X32-NEXT:    kmovw %edx, %k0
+; KNL_X32-NEXT:    movw $-16385, %ax ## imm = 0xBFFF
+; KNL_X32-NEXT:    kmovw %eax, %k0
 ; KNL_X32-NEXT:    kandw %k0, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k6
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $14, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kshiftlw $1, %k5, %k5
 ; KNL_X32-NEXT:    kshiftrw $1, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k6
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kmovw %k5, (%esp) ## 2-byte Spill
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    andl $1, %edx
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; KNL_X32-NEXT:    kmovw %ebx, %k5
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    andl $1, %eax
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; KNL_X32-NEXT:    kmovw %ecx, %k5
 ; KNL_X32-NEXT:    kshiftlw $15, %k5, %k5
 ; KNL_X32-NEXT:    kshiftrw $14, %k5, %k5
-; KNL_X32-NEXT:    kmovw %edx, %k6
+; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    korw %k5, %k6, %k5
 ; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL_X32-NEXT:    kandw %k6, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k6
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL_X32-NEXT:    kandw %k6, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k6
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $12, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL_X32-NEXT:    kandw %k6, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k6
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $11, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL_X32-NEXT:    kandw %k6, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k6
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL_X32-NEXT:    kandw %k6, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k6
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL_X32-NEXT:    kandw %k6, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k6
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $8, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL_X32-NEXT:    kandw %k6, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k6
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $7, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kandw %k7, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k6
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $6, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kandw %k4, %k5, %k4
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k5
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k5
 ; KNL_X32-NEXT:    kshiftlw $15, %k5, %k5
 ; KNL_X32-NEXT:    kshiftrw $5, %k5, %k5
 ; KNL_X32-NEXT:    korw %k5, %k4, %k4
 ; KNL_X32-NEXT:    kandw %k3, %k4, %k3
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k4
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k4
 ; KNL_X32-NEXT:    kshiftlw $15, %k4, %k4
 ; KNL_X32-NEXT:    kshiftrw $4, %k4, %k4
 ; KNL_X32-NEXT:    korw %k4, %k3, %k3
 ; KNL_X32-NEXT:    kandw %k2, %k3, %k2
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k3
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k3
 ; KNL_X32-NEXT:    kshiftlw $15, %k3, %k3
 ; KNL_X32-NEXT:    kshiftrw $3, %k3, %k3
 ; KNL_X32-NEXT:    korw %k3, %k2, %k2
 ; KNL_X32-NEXT:    kandw %k1, %k2, %k1
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k2
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k2
 ; KNL_X32-NEXT:    kshiftlw $15, %k2, %k2
 ; KNL_X32-NEXT:    kshiftrw $2, %k2, %k2
 ; KNL_X32-NEXT:    korw %k2, %k1, %k1
 ; KNL_X32-NEXT:    kandw %k0, %k1, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kshiftlw $14, %k1, %k1
 ; KNL_X32-NEXT:    korw %k1, %k0, %k0
 ; KNL_X32-NEXT:    kshiftlw $1, %k0, %k0
 ; KNL_X32-NEXT:    kshiftrw $1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT:    kmovw %edx, %k1
+; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    korw %k1, %k0, %k0
-; KNL_X32-NEXT:    kmovw %ecx, %k1
-; KNL_X32-NEXT:    kmovw (%esp), %k2 ## 2-byte Reload
-; KNL_X32-NEXT:    kandw %k2, %k0, %k0
-; KNL_X32-NEXT:    kmovw %eax, %k2
+; KNL_X32-NEXT:    kmovw (%esp), %k1 ## 2-byte Reload
+; KNL_X32-NEXT:    kandw %k1, %k0, %k0
+; KNL_X32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; KNL_X32-NEXT:    kmovw {{[0-9]+}}(%esp), %k2
 ; KNL_X32-NEXT:    kandw %k1, %k2, %k1
 ; KNL_X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; KNL_X32-NEXT:    kmovw %k1, %ebx

diff  --git a/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll b/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
index e43b2f4b4abc46..6c661eb771d1bd 100644
--- a/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
+++ b/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
@@ -697,12 +697,10 @@ define void @load_v3i1_broadcast_1_v1i1_store(ptr %a0,ptr %a1) {
 ; AVX512-LABEL: load_v3i1_broadcast_1_v1i1_store:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    movzbl (%rdi), %eax
-; AVX512-NEXT:    shrb %al
 ; AVX512-NEXT:    xorl %ecx, %ecx
-; AVX512-NEXT:    testb $1, %al
-; AVX512-NEXT:    movl $255, %eax
-; AVX512-NEXT:    cmovel %ecx, %eax
-; AVX512-NEXT:    kmovd %eax, %k0
+; AVX512-NEXT:    btl $1, %eax
+; AVX512-NEXT:    sbbl %ecx, %ecx
+; AVX512-NEXT:    kmovd %ecx, %k0
 ; AVX512-NEXT:    kshiftrb $1, %k0, %k0
 ; AVX512-NEXT:    kshiftlb $7, %k0, %k0
 ; AVX512-NEXT:    kshiftrb $7, %k0, %k0
@@ -712,12 +710,10 @@ define void @load_v3i1_broadcast_1_v1i1_store(ptr %a0,ptr %a1) {
 ; AVX512NOTDQ-LABEL: load_v3i1_broadcast_1_v1i1_store:
 ; AVX512NOTDQ:       # %bb.0:
 ; AVX512NOTDQ-NEXT:    movzbl (%rdi), %eax
-; AVX512NOTDQ-NEXT:    shrb %al
 ; AVX512NOTDQ-NEXT:    xorl %ecx, %ecx
-; AVX512NOTDQ-NEXT:    testb $1, %al
-; AVX512NOTDQ-NEXT:    movl $255, %eax
-; AVX512NOTDQ-NEXT:    cmovel %ecx, %eax
-; AVX512NOTDQ-NEXT:    kmovd %eax, %k0
+; AVX512NOTDQ-NEXT:    btl $1, %eax
+; AVX512NOTDQ-NEXT:    sbbl %ecx, %ecx
+; AVX512NOTDQ-NEXT:    kmovd %ecx, %k0
 ; AVX512NOTDQ-NEXT:    kshiftrw $1, %k0, %k0
 ; AVX512NOTDQ-NEXT:    kshiftlw $15, %k0, %k0
 ; AVX512NOTDQ-NEXT:    kshiftrw $15, %k0, %k0
@@ -732,10 +728,10 @@ define void @load_v3i1_broadcast_1_v1i1_store(ptr %a0,ptr %a1) {
 define void @load_v3i1_broadcast_2_v1i1_store(ptr %a0,ptr %a1) {
 ; AVX512-LABEL: load_v3i1_broadcast_2_v1i1_store:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    testb $4, (%rdi)
-; AVX512-NEXT:    movl $255, %ecx
-; AVX512-NEXT:    cmovel %eax, %ecx
+; AVX512-NEXT:    movzbl (%rdi), %eax
+; AVX512-NEXT:    xorl %ecx, %ecx
+; AVX512-NEXT:    btl $2, %eax
+; AVX512-NEXT:    sbbl %ecx, %ecx
 ; AVX512-NEXT:    kmovd %ecx, %k0
 ; AVX512-NEXT:    kshiftrb $2, %k0, %k0
 ; AVX512-NEXT:    kshiftlb $7, %k0, %k0
@@ -745,10 +741,10 @@ define void @load_v3i1_broadcast_2_v1i1_store(ptr %a0,ptr %a1) {
 ;
 ; AVX512NOTDQ-LABEL: load_v3i1_broadcast_2_v1i1_store:
 ; AVX512NOTDQ:       # %bb.0:
-; AVX512NOTDQ-NEXT:    xorl %eax, %eax
-; AVX512NOTDQ-NEXT:    testb $4, (%rdi)
-; AVX512NOTDQ-NEXT:    movl $255, %ecx
-; AVX512NOTDQ-NEXT:    cmovel %eax, %ecx
+; AVX512NOTDQ-NEXT:    movzbl (%rdi), %eax
+; AVX512NOTDQ-NEXT:    xorl %ecx, %ecx
+; AVX512NOTDQ-NEXT:    btl $2, %eax
+; AVX512NOTDQ-NEXT:    sbbl %ecx, %ecx
 ; AVX512NOTDQ-NEXT:    kmovd %ecx, %k0
 ; AVX512NOTDQ-NEXT:    kshiftrw $2, %k0, %k0
 ; AVX512NOTDQ-NEXT:    kshiftlw $15, %k0, %k0

diff  --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index 2a77d0238721c0..f2a197cca8ae5b 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -298,10 +298,8 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) nounwind {
 define i16 @test15(ptr%addr) nounwind {
 ; CHECK-LABEL: test15:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    xorl %ecx, %ecx
-; CHECK-NEXT:    cmpb $0, (%rdi)
-; CHECK-NEXT:    movl $65535, %eax ## imm = 0xFFFF
-; CHECK-NEXT:    cmovel %ecx, %eax
+; CHECK-NEXT:    movzbl (%rdi), %eax
+; CHECK-NEXT:    negl %eax
 ; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %x = load i1 , ptr %addr, align 1

diff  --git a/llvm/test/CodeGen/X86/pr43507.ll b/llvm/test/CodeGen/X86/pr43507.ll
index ec18d3c13ba816..24c27fbd7f8d3e 100644
--- a/llvm/test/CodeGen/X86/pr43507.ll
+++ b/llvm/test/CodeGen/X86/pr43507.ll
@@ -4,11 +4,9 @@
 define <8 x i1> @ham(i64 %arg) {
 ; CHECK-LABEL: ham:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    movl $255, %ecx
-; CHECK-NEXT:    cmovel %eax, %ecx
-; CHECK-NEXT:    kmovd %ecx, %k0
+; CHECK-NEXT:    andb $1, %dil
+; CHECK-NEXT:    negb %dil
+; CHECK-NEXT:    kmovd %edi, %k0
 ; CHECK-NEXT:    vpmovm2w %k0, %xmm0
 ; CHECK-NEXT:    retq
   %tmp = trunc i64 %arg to i1

diff  --git a/llvm/test/CodeGen/X86/setcc-lowering.ll b/llvm/test/CodeGen/X86/setcc-lowering.ll
index aa4fbb469c14fd..5bbc73eacd968e 100644
--- a/llvm/test/CodeGen/X86/setcc-lowering.ll
+++ b/llvm/test/CodeGen/X86/setcc-lowering.ll
@@ -88,27 +88,25 @@ define void @pr26232(i64 %a, <16 x i1> %b) nounwind {
 ;
 ; KNL-32-LABEL: pr26232:
 ; KNL-32:       # %bb.0: # %allocas
-; KNL-32-NEXT:    pushl %esi
 ; KNL-32-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; KNL-32-NEXT:    vpslld $31, %zmm0, %zmm0
 ; KNL-32-NEXT:    vptestmd %zmm0, %zmm0, %k0
 ; KNL-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; KNL-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; KNL-32-NEXT:    movl $65535, %edx # imm = 0xFFFF
 ; KNL-32-NEXT:    .p2align 4, 0x90
 ; KNL-32-NEXT:  .LBB1_1: # %for_loop599
 ; KNL-32-NEXT:    # =>This Inner Loop Header: Depth=1
 ; KNL-32-NEXT:    cmpl $65536, %ecx # imm = 0x10000
-; KNL-32-NEXT:    movl %eax, %esi
-; KNL-32-NEXT:    sbbl $0, %esi
-; KNL-32-NEXT:    movl $0, %esi
-; KNL-32-NEXT:    cmovll %edx, %esi
-; KNL-32-NEXT:    kmovw %esi, %k1
+; KNL-32-NEXT:    movl %eax, %edx
+; KNL-32-NEXT:    sbbl $0, %edx
+; KNL-32-NEXT:    setl %dl
+; KNL-32-NEXT:    movzbl %dl, %edx
+; KNL-32-NEXT:    negl %edx
+; KNL-32-NEXT:    kmovw %edx, %k1
 ; KNL-32-NEXT:    kandw %k0, %k1, %k1
 ; KNL-32-NEXT:    kortestw %k1, %k1
 ; KNL-32-NEXT:    jne .LBB1_1
 ; KNL-32-NEXT:  # %bb.2: # %for_exit600
-; KNL-32-NEXT:    popl %esi
 ; KNL-32-NEXT:    retl
 allocas:
   br label %for_test11.preheader

diff  --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
index 24113441a4e25a..c71a96f704ac38 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
@@ -415,11 +415,10 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
 ; AVX512F-NEXT:    vpsrld $16, %xmm0, %xmm1
 ; AVX512F-NEXT:    vcvtph2ps %xmm0, %xmm2
 ; AVX512F-NEXT:    vcvtph2ps %xmm1, %xmm3
-; AVX512F-NEXT:    xorl %eax, %eax
 ; AVX512F-NEXT:    vucomiss %xmm3, %xmm2
-; AVX512F-NEXT:    movl $255, %ecx
-; AVX512F-NEXT:    cmovbel %eax, %ecx
-; AVX512F-NEXT:    kmovd %ecx, %k1
+; AVX512F-NEXT:    seta %al
+; AVX512F-NEXT:    negb %al
+; AVX512F-NEXT:    kmovd %eax, %k1
 ; AVX512F-NEXT:    vmovdqu16 %zmm0, %zmm1 {%k1}
 ; AVX512F-NEXT:    vmovdqa %xmm1, %xmm0
 ; AVX512F-NEXT:    vzeroupper
@@ -430,11 +429,10 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
 ; AVX512VL-NEXT:    vpsrld $16, %xmm0, %xmm1
 ; AVX512VL-NEXT:    vcvtph2ps %xmm0, %xmm2
 ; AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm3
-; AVX512VL-NEXT:    xorl %eax, %eax
 ; AVX512VL-NEXT:    vucomiss %xmm3, %xmm2
-; AVX512VL-NEXT:    movl $255, %ecx
-; AVX512VL-NEXT:    cmovbel %eax, %ecx
-; AVX512VL-NEXT:    kmovd %ecx, %k1
+; AVX512VL-NEXT:    seta %al
+; AVX512VL-NEXT:    negb %al
+; AVX512VL-NEXT:    kmovd %eax, %k1
 ; AVX512VL-NEXT:    vmovdqu16 %xmm0, %xmm1 {%k1}
 ; AVX512VL-NEXT:    vmovdqa %xmm1, %xmm0
 ; AVX512VL-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
index edefb16d40e6ed..2dffe2bf0dfa1f 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
@@ -416,9 +416,8 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
 ; AVX512F-NEXT:    vcvtph2ps %xmm1, %xmm3
 ; AVX512F-NEXT:    xorl %eax, %eax
 ; AVX512F-NEXT:    vucomiss %xmm3, %xmm2
-; AVX512F-NEXT:    movl $255, %ecx
-; AVX512F-NEXT:    cmovael %eax, %ecx
-; AVX512F-NEXT:    kmovd %ecx, %k1
+; AVX512F-NEXT:    sbbl %eax, %eax
+; AVX512F-NEXT:    kmovd %eax, %k1
 ; AVX512F-NEXT:    vmovdqu16 %zmm0, %zmm1 {%k1}
 ; AVX512F-NEXT:    vmovdqa %xmm1, %xmm0
 ; AVX512F-NEXT:    vzeroupper
@@ -431,9 +430,8 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
 ; AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm3
 ; AVX512VL-NEXT:    xorl %eax, %eax
 ; AVX512VL-NEXT:    vucomiss %xmm3, %xmm2
-; AVX512VL-NEXT:    movl $255, %ecx
-; AVX512VL-NEXT:    cmovael %eax, %ecx
-; AVX512VL-NEXT:    kmovd %ecx, %k1
+; AVX512VL-NEXT:    sbbl %eax, %eax
+; AVX512VL-NEXT:    kmovd %eax, %k1
 ; AVX512VL-NEXT:    vmovdqu16 %xmm0, %xmm1 {%k1}
 ; AVX512VL-NEXT:    vmovdqa %xmm1, %xmm0
 ; AVX512VL-NEXT:    retq


        


More information about the llvm-commits mailing list