[llvm] 3d00e1e - [X86] LowerSELECTWithCmpZero - fold "SELECT (AND(X,1) == 0), 0, -1 -> NEG(AND(X,1))"
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 8 02:40:29 PDT 2024
Author: Simon Pilgrim
Date: 2024-09-08T10:08:50+01:00
New Revision: 3d00e1e208413738fc978d0a4b4ff853c0413618
URL: https://github.com/llvm/llvm-project/commit/3d00e1e208413738fc978d0a4b4ff853c0413618
DIFF: https://github.com/llvm/llvm-project/commit/3d00e1e208413738fc978d0a4b4ff853c0413618.diff
LOG: [X86] LowerSELECTWithCmpZero - fold "SELECT (AND(X,1) == 0), 0, -1 -> NEG(AND(X,1))"
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avx512-calling-conv.ll
llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
llvm/test/CodeGen/X86/avx512-insert-extract.ll
llvm/test/CodeGen/X86/pr43507.ll
llvm/test/CodeGen/X86/setcc-lowering.ll
llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3b90ab3acddbd6..b8a6f10cab623d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24086,6 +24086,23 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
if (X86CC == X86::COND_E && CmpVal.getOpcode() == ISD::AND &&
isOneConstant(CmpVal.getOperand(1))) {
+ auto SplatLSB = [&]() {
+ // we need mask of all zeros or ones with same size of the other
+ // operands.
+ SDValue Neg = CmpVal;
+ if (CmpVT.bitsGT(VT))
+ Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpVal);
+ else if (CmpVT.bitsLT(VT))
+ Neg = DAG.getNode(
+ ISD::AND, DL, VT,
+ DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpVal.getOperand(0)),
+ DAG.getConstant(1, DL, VT));
+ return DAG.getNegative(Neg, DL, VT); // -(and (x, 0x1))
+ };
+
+ // SELECT (AND(X,1) == 0), 0, -1 -> NEG(AND(X,1))
+ if (isNullConstant(LHS) && isAllOnesConstant(RHS))
+ return SplatLSB();
SDValue Src1, Src2;
auto isIdentityPattern = [&]() {
@@ -24116,17 +24133,7 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
// SELECT (AND(X,1) == 0), Y, (ADD Y, Z) -> (ADD Y, (AND NEG(AND(X,1)), Z))
// SELECT (AND(X,1) == 0), Y, (SUB Y, Z) -> (SUB Y, (AND NEG(AND(X,1)), Z))
if (!Subtarget.canUseCMOV() && isIdentityPattern()) {
- // we need mask of all zeros or ones with same size of the other
- // operands.
- SDValue Neg = CmpVal;
- if (CmpVT.bitsGT(VT))
- Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpVal);
- else if (CmpVT.bitsLT(VT))
- Neg = DAG.getNode(
- ISD::AND, DL, VT,
- DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpVal.getOperand(0)),
- DAG.getConstant(1, DL, VT));
- SDValue Mask = DAG.getNegative(Neg, DL, VT); // -(and (x, 0x1))
+ SDValue Mask = SplatLSB();
SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z
return DAG.getNode(RHS.getOpcode(), DL, VT, Src2, And); // y Op And
}
diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
index b39b089faa2a5e..c27cced9d5ffa7 100644
--- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll
+++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
@@ -679,13 +679,6 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; KNL-NEXT: pushq %r13
; KNL-NEXT: pushq %r12
; KNL-NEXT: pushq %rbx
-; KNL-NEXT: xorl %r10d, %r10d
-; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp)
-; KNL-NEXT: movl $65535, %eax ## imm = 0xFFFF
-; KNL-NEXT: movl $0, %r11d
-; KNL-NEXT: cmovnel %eax, %r11d
-; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp)
-; KNL-NEXT: cmovnel %eax, %r10d
; KNL-NEXT: movq %rdi, %rax
; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: andl $1, %edi
@@ -905,10 +898,10 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: korw %k1, %k0, %k0
-; KNL-NEXT: kmovw %r11d, %k1
-; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
-; KNL-NEXT: kandw %k2, %k0, %k0
-; KNL-NEXT: kmovw %r10d, %k2
+; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
+; KNL-NEXT: kandw %k1, %k0, %k0
+; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; KNL-NEXT: kandw %k1, %k2, %k1
; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftrw $1, %k0, %k1
@@ -1316,240 +1309,233 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; KNL_X32-NEXT: pushl %edi
; KNL_X32-NEXT: pushl %esi
; KNL_X32-NEXT: subl $16, %esp
-; KNL_X32-NEXT: xorl %eax, %eax
-; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp)
-; KNL_X32-NEXT: movl $65535, %edx ## imm = 0xFFFF
-; KNL_X32-NEXT: movl $0, %ecx
-; KNL_X32-NEXT: cmovnel %edx, %ecx
-; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp)
-; KNL_X32-NEXT: cmovnel %edx, %eax
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: andl $1, %edx
-; KNL_X32-NEXT: kmovw %edx, %k0
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: andl $1, %eax
+; KNL_X32-NEXT: kmovw %eax, %k0
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $14, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
-; KNL_X32-NEXT: movw $-5, %dx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movw $-5, %ax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $13, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
-; KNL_X32-NEXT: movw $-9, %dx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movw $-9, %ax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $12, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
-; KNL_X32-NEXT: movw $-17, %dx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movw $-17, %ax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $11, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
-; KNL_X32-NEXT: movw $-33, %dx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movw $-33, %ax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $10, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
-; KNL_X32-NEXT: movw $-65, %dx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movw $-65, %ax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $9, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
-; KNL_X32-NEXT: movw $-129, %dx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movw $-129, %ax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $8, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
-; KNL_X32-NEXT: movw $-257, %dx ## imm = 0xFEFF
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movw $-257, %ax ## imm = 0xFEFF
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $7, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
-; KNL_X32-NEXT: movw $-513, %dx ## imm = 0xFDFF
-; KNL_X32-NEXT: kmovw %edx, %k7
+; KNL_X32-NEXT: movw $-513, %ax ## imm = 0xFDFF
+; KNL_X32-NEXT: kmovw %eax, %k7
; KNL_X32-NEXT: kandw %k7, %k0, %k0
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $6, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
-; KNL_X32-NEXT: movw $-1025, %dx ## imm = 0xFBFF
-; KNL_X32-NEXT: kmovw %edx, %k4
+; KNL_X32-NEXT: movw $-1025, %ax ## imm = 0xFBFF
+; KNL_X32-NEXT: kmovw %eax, %k4
; KNL_X32-NEXT: kandw %k4, %k0, %k0
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $5, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
-; KNL_X32-NEXT: movw $-2049, %dx ## imm = 0xF7FF
-; KNL_X32-NEXT: kmovw %edx, %k3
+; KNL_X32-NEXT: movw $-2049, %ax ## imm = 0xF7FF
+; KNL_X32-NEXT: kmovw %eax, %k3
; KNL_X32-NEXT: kandw %k3, %k0, %k0
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $4, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
-; KNL_X32-NEXT: movw $-4097, %dx ## imm = 0xEFFF
-; KNL_X32-NEXT: kmovw %edx, %k2
+; KNL_X32-NEXT: movw $-4097, %ax ## imm = 0xEFFF
+; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kandw %k2, %k0, %k0
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $3, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
-; KNL_X32-NEXT: movw $-8193, %dx ## imm = 0xDFFF
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movw $-8193, %ax ## imm = 0xDFFF
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k5
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k5
; KNL_X32-NEXT: kshiftlw $15, %k5, %k5
; KNL_X32-NEXT: kshiftrw $2, %k5, %k5
; KNL_X32-NEXT: korw %k5, %k0, %k5
-; KNL_X32-NEXT: movw $-16385, %dx ## imm = 0xBFFF
-; KNL_X32-NEXT: kmovw %edx, %k0
+; KNL_X32-NEXT: movw $-16385, %ax ## imm = 0xBFFF
+; KNL_X32-NEXT: kmovw %eax, %k0
; KNL_X32-NEXT: kandw %k0, %k5, %k5
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k6
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $14, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kshiftlw $1, %k5, %k5
; KNL_X32-NEXT: kshiftrw $1, %k5, %k5
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k6
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kmovw %k5, (%esp) ## 2-byte Spill
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: andl $1, %edx
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
-; KNL_X32-NEXT: kmovw %ebx, %k5
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: andl $1, %eax
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; KNL_X32-NEXT: kmovw %ecx, %k5
; KNL_X32-NEXT: kshiftlw $15, %k5, %k5
; KNL_X32-NEXT: kshiftrw $14, %k5, %k5
-; KNL_X32-NEXT: kmovw %edx, %k6
+; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: korw %k5, %k6, %k5
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT: kandw %k6, %k5, %k5
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k6
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $13, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT: kandw %k6, %k5, %k5
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k6
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $12, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT: kandw %k6, %k5, %k5
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k6
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $11, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT: kandw %k6, %k5, %k5
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k6
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $10, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT: kandw %k6, %k5, %k5
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k6
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $9, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT: kandw %k6, %k5, %k5
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k6
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $8, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT: kandw %k6, %k5, %k5
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k6
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $7, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kandw %k7, %k5, %k5
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k6
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $6, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kandw %k4, %k5, %k4
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k5
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k5
; KNL_X32-NEXT: kshiftlw $15, %k5, %k5
; KNL_X32-NEXT: kshiftrw $5, %k5, %k5
; KNL_X32-NEXT: korw %k5, %k4, %k4
; KNL_X32-NEXT: kandw %k3, %k4, %k3
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k4
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k4
; KNL_X32-NEXT: kshiftlw $15, %k4, %k4
; KNL_X32-NEXT: kshiftrw $4, %k4, %k4
; KNL_X32-NEXT: korw %k4, %k3, %k3
; KNL_X32-NEXT: kandw %k2, %k3, %k2
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k3
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k3
; KNL_X32-NEXT: kshiftlw $15, %k3, %k3
; KNL_X32-NEXT: kshiftrw $3, %k3, %k3
; KNL_X32-NEXT: korw %k3, %k2, %k2
; KNL_X32-NEXT: kandw %k1, %k2, %k1
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k2
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $2, %k2, %k2
; KNL_X32-NEXT: korw %k2, %k1, %k1
; KNL_X32-NEXT: kandw %k0, %k1, %k0
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $14, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: kshiftlw $1, %k0, %k0
; KNL_X32-NEXT: kshiftrw $1, %k0, %k0
-; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; KNL_X32-NEXT: kmovw %edx, %k1
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
-; KNL_X32-NEXT: kmovw %ecx, %k1
-; KNL_X32-NEXT: kmovw (%esp), %k2 ## 2-byte Reload
-; KNL_X32-NEXT: kandw %k2, %k0, %k0
-; KNL_X32-NEXT: kmovw %eax, %k2
+; KNL_X32-NEXT: kmovw (%esp), %k1 ## 2-byte Reload
+; KNL_X32-NEXT: kandw %k1, %k0, %k0
+; KNL_X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; KNL_X32-NEXT: kmovw {{[0-9]+}}(%esp), %k2
; KNL_X32-NEXT: kandw %k1, %k2, %k1
; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %k1, %ebx
diff --git a/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll b/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
index e43b2f4b4abc46..6c661eb771d1bd 100644
--- a/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
+++ b/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
@@ -697,12 +697,10 @@ define void @load_v3i1_broadcast_1_v1i1_store(ptr %a0,ptr %a1) {
; AVX512-LABEL: load_v3i1_broadcast_1_v1i1_store:
; AVX512: # %bb.0:
; AVX512-NEXT: movzbl (%rdi), %eax
-; AVX512-NEXT: shrb %al
; AVX512-NEXT: xorl %ecx, %ecx
-; AVX512-NEXT: testb $1, %al
-; AVX512-NEXT: movl $255, %eax
-; AVX512-NEXT: cmovel %ecx, %eax
-; AVX512-NEXT: kmovd %eax, %k0
+; AVX512-NEXT: btl $1, %eax
+; AVX512-NEXT: sbbl %ecx, %ecx
+; AVX512-NEXT: kmovd %ecx, %k0
; AVX512-NEXT: kshiftrb $1, %k0, %k0
; AVX512-NEXT: kshiftlb $7, %k0, %k0
; AVX512-NEXT: kshiftrb $7, %k0, %k0
@@ -712,12 +710,10 @@ define void @load_v3i1_broadcast_1_v1i1_store(ptr %a0,ptr %a1) {
; AVX512NOTDQ-LABEL: load_v3i1_broadcast_1_v1i1_store:
; AVX512NOTDQ: # %bb.0:
; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
-; AVX512NOTDQ-NEXT: shrb %al
; AVX512NOTDQ-NEXT: xorl %ecx, %ecx
-; AVX512NOTDQ-NEXT: testb $1, %al
-; AVX512NOTDQ-NEXT: movl $255, %eax
-; AVX512NOTDQ-NEXT: cmovel %ecx, %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k0
+; AVX512NOTDQ-NEXT: btl $1, %eax
+; AVX512NOTDQ-NEXT: sbbl %ecx, %ecx
+; AVX512NOTDQ-NEXT: kmovd %ecx, %k0
; AVX512NOTDQ-NEXT: kshiftrw $1, %k0, %k0
; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
@@ -732,10 +728,10 @@ define void @load_v3i1_broadcast_1_v1i1_store(ptr %a0,ptr %a1) {
define void @load_v3i1_broadcast_2_v1i1_store(ptr %a0,ptr %a1) {
; AVX512-LABEL: load_v3i1_broadcast_2_v1i1_store:
; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: testb $4, (%rdi)
-; AVX512-NEXT: movl $255, %ecx
-; AVX512-NEXT: cmovel %eax, %ecx
+; AVX512-NEXT: movzbl (%rdi), %eax
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: btl $2, %eax
+; AVX512-NEXT: sbbl %ecx, %ecx
; AVX512-NEXT: kmovd %ecx, %k0
; AVX512-NEXT: kshiftrb $2, %k0, %k0
; AVX512-NEXT: kshiftlb $7, %k0, %k0
@@ -745,10 +741,10 @@ define void @load_v3i1_broadcast_2_v1i1_store(ptr %a0,ptr %a1) {
;
; AVX512NOTDQ-LABEL: load_v3i1_broadcast_2_v1i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: xorl %eax, %eax
-; AVX512NOTDQ-NEXT: testb $4, (%rdi)
-; AVX512NOTDQ-NEXT: movl $255, %ecx
-; AVX512NOTDQ-NEXT: cmovel %eax, %ecx
+; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
+; AVX512NOTDQ-NEXT: xorl %ecx, %ecx
+; AVX512NOTDQ-NEXT: btl $2, %eax
+; AVX512NOTDQ-NEXT: sbbl %ecx, %ecx
; AVX512NOTDQ-NEXT: kmovd %ecx, %k0
; AVX512NOTDQ-NEXT: kshiftrw $2, %k0, %k0
; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index 2a77d0238721c0..f2a197cca8ae5b 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -298,10 +298,8 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) nounwind {
define i16 @test15(ptr%addr) nounwind {
; CHECK-LABEL: test15:
; CHECK: ## %bb.0:
-; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: cmpb $0, (%rdi)
-; CHECK-NEXT: movl $65535, %eax ## imm = 0xFFFF
-; CHECK-NEXT: cmovel %ecx, %eax
+; CHECK-NEXT: movzbl (%rdi), %eax
+; CHECK-NEXT: negl %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%x = load i1 , ptr %addr, align 1
diff --git a/llvm/test/CodeGen/X86/pr43507.ll b/llvm/test/CodeGen/X86/pr43507.ll
index ec18d3c13ba816..24c27fbd7f8d3e 100644
--- a/llvm/test/CodeGen/X86/pr43507.ll
+++ b/llvm/test/CodeGen/X86/pr43507.ll
@@ -4,11 +4,9 @@
define <8 x i1> @ham(i64 %arg) {
; CHECK-LABEL: ham:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: testb $1, %dil
-; CHECK-NEXT: movl $255, %ecx
-; CHECK-NEXT: cmovel %eax, %ecx
-; CHECK-NEXT: kmovd %ecx, %k0
+; CHECK-NEXT: andb $1, %dil
+; CHECK-NEXT: negb %dil
+; CHECK-NEXT: kmovd %edi, %k0
; CHECK-NEXT: vpmovm2w %k0, %xmm0
; CHECK-NEXT: retq
%tmp = trunc i64 %arg to i1
diff --git a/llvm/test/CodeGen/X86/setcc-lowering.ll b/llvm/test/CodeGen/X86/setcc-lowering.ll
index aa4fbb469c14fd..5bbc73eacd968e 100644
--- a/llvm/test/CodeGen/X86/setcc-lowering.ll
+++ b/llvm/test/CodeGen/X86/setcc-lowering.ll
@@ -88,27 +88,25 @@ define void @pr26232(i64 %a, <16 x i1> %b) nounwind {
;
; KNL-32-LABEL: pr26232:
; KNL-32: # %bb.0: # %allocas
-; KNL-32-NEXT: pushl %esi
; KNL-32-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-32-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-32-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; KNL-32-NEXT: movl $65535, %edx # imm = 0xFFFF
; KNL-32-NEXT: .p2align 4, 0x90
; KNL-32-NEXT: .LBB1_1: # %for_loop599
; KNL-32-NEXT: # =>This Inner Loop Header: Depth=1
; KNL-32-NEXT: cmpl $65536, %ecx # imm = 0x10000
-; KNL-32-NEXT: movl %eax, %esi
-; KNL-32-NEXT: sbbl $0, %esi
-; KNL-32-NEXT: movl $0, %esi
-; KNL-32-NEXT: cmovll %edx, %esi
-; KNL-32-NEXT: kmovw %esi, %k1
+; KNL-32-NEXT: movl %eax, %edx
+; KNL-32-NEXT: sbbl $0, %edx
+; KNL-32-NEXT: setl %dl
+; KNL-32-NEXT: movzbl %dl, %edx
+; KNL-32-NEXT: negl %edx
+; KNL-32-NEXT: kmovw %edx, %k1
; KNL-32-NEXT: kandw %k0, %k1, %k1
; KNL-32-NEXT: kortestw %k1, %k1
; KNL-32-NEXT: jne .LBB1_1
; KNL-32-NEXT: # %bb.2: # %for_exit600
-; KNL-32-NEXT: popl %esi
; KNL-32-NEXT: retl
allocas:
br label %for_test11.preheader
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
index 24113441a4e25a..c71a96f704ac38 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
@@ -415,11 +415,10 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
; AVX512F-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm2
; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm3
-; AVX512F-NEXT: xorl %eax, %eax
; AVX512F-NEXT: vucomiss %xmm3, %xmm2
-; AVX512F-NEXT: movl $255, %ecx
-; AVX512F-NEXT: cmovbel %eax, %ecx
-; AVX512F-NEXT: kmovd %ecx, %k1
+; AVX512F-NEXT: seta %al
+; AVX512F-NEXT: negb %al
+; AVX512F-NEXT: kmovd %eax, %k1
; AVX512F-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
; AVX512F-NEXT: vmovdqa %xmm1, %xmm0
; AVX512F-NEXT: vzeroupper
@@ -430,11 +429,10 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
; AVX512VL-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm2
; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm3
-; AVX512VL-NEXT: xorl %eax, %eax
; AVX512VL-NEXT: vucomiss %xmm3, %xmm2
-; AVX512VL-NEXT: movl $255, %ecx
-; AVX512VL-NEXT: cmovbel %eax, %ecx
-; AVX512VL-NEXT: kmovd %ecx, %k1
+; AVX512VL-NEXT: seta %al
+; AVX512VL-NEXT: negb %al
+; AVX512VL-NEXT: kmovd %eax, %k1
; AVX512VL-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0
; AVX512VL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
index edefb16d40e6ed..2dffe2bf0dfa1f 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
@@ -416,9 +416,8 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm3
; AVX512F-NEXT: xorl %eax, %eax
; AVX512F-NEXT: vucomiss %xmm3, %xmm2
-; AVX512F-NEXT: movl $255, %ecx
-; AVX512F-NEXT: cmovael %eax, %ecx
-; AVX512F-NEXT: kmovd %ecx, %k1
+; AVX512F-NEXT: sbbl %eax, %eax
+; AVX512F-NEXT: kmovd %eax, %k1
; AVX512F-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
; AVX512F-NEXT: vmovdqa %xmm1, %xmm0
; AVX512F-NEXT: vzeroupper
@@ -431,9 +430,8 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm3
; AVX512VL-NEXT: xorl %eax, %eax
; AVX512VL-NEXT: vucomiss %xmm3, %xmm2
-; AVX512VL-NEXT: movl $255, %ecx
-; AVX512VL-NEXT: cmovael %eax, %ecx
-; AVX512VL-NEXT: kmovd %ecx, %k1
+; AVX512VL-NEXT: sbbl %eax, %eax
+; AVX512VL-NEXT: kmovd %eax, %k1
; AVX512VL-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0
; AVX512VL-NEXT: retq
More information about the llvm-commits
mailing list