[llvm] c660a2f - [X86] Fold ANDNP(X,NOT(Y)) -> NOT(OR(X,Y))
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 13 08:58:34 PDT 2023
Author: Simon Pilgrim
Date: 2023-07-13T16:56:20+01:00
New Revision: c660a2f0ab1297b178fd06853c4991d0f07d8fa0
URL: https://github.com/llvm/llvm-project/commit/c660a2f0ab1297b178fd06853c4991d0f07d8fa0
DIFF: https://github.com/llvm/llvm-project/commit/c660a2f0ab1297b178fd06853c4991d0f07d8fa0.diff
LOG: [X86] Fold ANDNP(X,NOT(Y)) -> NOT(OR(X,Y))
Removing the x86-specific node helps further folding and improves commutativity
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/bool-ext-inc.ll
llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
llvm/test/CodeGen/X86/icmp-pow2-diff.ll
llvm/test/CodeGen/X86/mul-cmp.ll
llvm/test/CodeGen/X86/sat-add.ll
llvm/test/CodeGen/X86/setcc-logic.ll
llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
llvm/test/CodeGen/X86/sshl_sat_vec.ll
llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
llvm/test/CodeGen/X86/vsplit-and.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0dd9a0e403d27a..9d3c8e734b5a6f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54364,11 +54364,12 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
MVT VT = N->getSimpleValueType(0);
int NumElts = VT.getVectorNumElements();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ SDLoc DL(N);
// ANDNP(undef, x) -> 0
// ANDNP(x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// ANDNP(0, x) -> x
if (ISD::isBuildVectorAllZeros(N0.getNode()))
@@ -54376,21 +54377,27 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
// ANDNP(x, 0) -> 0
if (ISD::isBuildVectorAllZeros(N1.getNode()))
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// ANDNP(x, -1) -> NOT(x) -> XOR(x, -1)
if (ISD::isBuildVectorAllOnes(N1.getNode()))
- return DAG.getNOT(SDLoc(N), N0, VT);
+ return DAG.getNOT(DL, N0, VT);
// Turn ANDNP back to AND if input is inverted.
if (SDValue Not = IsNOT(N0, DAG))
- return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getBitcast(VT, Not), N1);
+ return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
+
+ // Fold for better commutatvity:
+ // ANDNP(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
+ if (N1->hasOneUse())
+ if (SDValue Not = IsNOT(N1, DAG))
+ return DAG.getNOT(
+ DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
// Constant Folding
APInt Undefs0, Undefs1;
SmallVector<APInt> EltBits0, EltBits1;
if (getTargetConstantBitsFromNode(N0, EltSizeInBits, Undefs0, EltBits0)) {
- SDLoc DL(N);
if (getTargetConstantBitsFromNode(N1, EltSizeInBits, Undefs1, EltBits1)) {
SmallVector<APInt> ResultBits;
for (int I = 0; I != NumElts; ++I)
diff --git a/llvm/test/CodeGen/X86/bool-ext-inc.ll b/llvm/test/CodeGen/X86/bool-ext-inc.ll
index d89893f94bdaec..088b0ce857f202 100644
--- a/llvm/test/CodeGen/X86/bool-ext-inc.ll
+++ b/llvm/test/CodeGen/X86/bool-ext-inc.ll
@@ -88,11 +88,8 @@ define <4 x i32> @bool_logic_and_math_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32>
; CHECK: # %bb.0:
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
-; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpsrld $31, %xmm0, %xmm0
; CHECK-NEXT: retq
%cmp1 = icmp ne <4 x i32> %a, %b
%cmp2 = icmp ne <4 x i32> %c, %d
diff --git a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
index 6f2be411217b66..bc40a7330fe2a5 100644
--- a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
+++ b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
@@ -647,8 +647,8 @@ define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) {
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487]
; AVX2-NEXT: vpcmpeqq %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vzeroupper
@@ -666,9 +666,8 @@ define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) {
; SSE41-NEXT: pcmpeqq %xmm4, %xmm1
; SSE41-NEXT: pcmpeqq %xmm4, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE41-NEXT: orps %xmm2, %xmm0
; SSE41-NEXT: xorps %xmm3, %xmm0
-; SSE41-NEXT: andnps %xmm0, %xmm2
-; SSE41-NEXT: movaps %xmm2, %xmm0
; SSE41-NEXT: retq
;
; SSE2-LABEL: ne_and_to_abs_vec4x64:
@@ -689,9 +688,8 @@ define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) {
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm1[1,3]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
; SSE2-NEXT: andps %xmm4, %xmm0
+; SSE2-NEXT: orps %xmm2, %xmm0
; SSE2-NEXT: xorps %xmm3, %xmm0
-; SSE2-NEXT: andnps %xmm0, %xmm2
-; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: retq
%cmp1 = icmp ne <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129>
%cmp2 = icmp ne <4 x i64> %x, <i64 -129, i64 -129, i64 -129, i64 -129>
@@ -715,8 +713,8 @@ define <4 x i64> @ne_and_to_abs_vec4x64_sext(<4 x i64> %x) {
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487]
; AVX2-NEXT: vpcmpeqq %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
; SSE41-LABEL: ne_and_to_abs_vec4x64_sext:
@@ -731,36 +729,36 @@ define <4 x i64> @ne_and_to_abs_vec4x64_sext(<4 x i64> %x) {
; SSE41-NEXT: pcmpeqq %xmm4, %xmm1
; SSE41-NEXT: pcmpeqq %xmm4, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE41-NEXT: orps %xmm2, %xmm0
; SSE41-NEXT: xorps %xmm3, %xmm0
-; SSE41-NEXT: andnps %xmm0, %xmm2
-; SSE41-NEXT: pmovsxdq %xmm2, %xmm0
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,2,3,3]
-; SSE41-NEXT: psllq $63, %xmm1
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE41-NEXT: pmovsxdq %xmm0, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; SSE41-NEXT: psllq $63, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: psrad $31, %xmm1
+; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
;
; SSE2-LABEL: ne_and_to_abs_vec4x64_sext:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [129,129]
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
; SSE2-NEXT: movdqa %xmm1, %xmm3
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm3
-; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm4
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
-; SSE2-NEXT: andps %xmm4, %xmm0
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE2-NEXT: andps %xmm4, %xmm2
; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
-; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
-; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm1[1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2]
-; SSE2-NEXT: andps %xmm4, %xmm2
-; SSE2-NEXT: xorps %xmm3, %xmm2
-; SSE2-NEXT: andnps %xmm2, %xmm0
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE2-NEXT: andps %xmm4, %xmm0
+; SSE2-NEXT: orps %xmm2, %xmm0
+; SSE2-NEXT: xorps %xmm3, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,1,3,3]
@@ -879,9 +877,8 @@ define <4 x i1> @ne_and_to_abs_vec4x32(<4 x i32> %x) {
; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
-; SSE2-NEXT: pandn %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
%cmp1 = icmp ne <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
%cmp2 = icmp ne <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -921,9 +918,8 @@ define <4 x i32> @ne_and_to_abs_vec4x32_sext(<4 x i32> %x) {
; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
-; SSE2-NEXT: pandn %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
%cmp1 = icmp ne <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
%cmp2 = icmp ne <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1112,8 +1108,8 @@ define <4 x i16> @ne_and_to_abs_vec4x16_sext(<4 x i16> %x) {
; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
;
; SSE41-LABEL: ne_and_to_abs_vec4x16_sext:
@@ -1122,9 +1118,8 @@ define <4 x i16> @ne_and_to_abs_vec4x16_sext(<4 x i16> %x) {
; SSE41-NEXT: pcmpeqw %xmm0, %xmm1
; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
; SSE41-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm2, %xmm0
-; SSE41-NEXT: pandn %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
; SSE2-LABEL: ne_and_to_abs_vec4x16_sext:
@@ -1133,9 +1128,8 @@ define <4 x i16> @ne_and_to_abs_vec4x16_sext(<4 x i16> %x) {
; SSE2-NEXT: pcmpeqw %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
-; SSE2-NEXT: pandn %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
%cmp1 = icmp ne <4 x i16> %x, <i16 88, i16 88, i16 88, i16 88>
%cmp2 = icmp ne <4 x i16> %x, <i16 -88, i16 -88, i16 -88, i16 -88>
diff --git a/llvm/test/CodeGen/X86/icmp-pow2-
diff .ll b/llvm/test/CodeGen/X86/icmp-pow2-
diff .ll
index f2f12654e6834c..00038e60628133 100644
--- a/llvm/test/CodeGen/X86/icmp-pow2-
diff .ll
+++ b/llvm/test/CodeGen/X86/icmp-pow2-
diff .ll
@@ -151,7 +151,7 @@ define <8 x i1> @andnot_ne_v8i16_todo_no_splat(<8 x i16> %x) nounwind {
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2
; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $18, %xmm1, %xmm2, %xmm0
+; AVX512-NEXT: vpternlogq $54, %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
;
; AVX2-LABEL: andnot_ne_v8i16_todo_no_splat:
@@ -159,19 +159,18 @@ define <8 x i1> @andnot_ne_v8i16_todo_no_splat(<8 x i16> %x) nounwind {
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2
; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpandn %xmm0, %xmm2, %xmm0
; AVX2-NEXT: retq
;
; SSE-LABEL: andnot_ne_v8i16_todo_no_splat:
; SSE: # %bb.0:
-; SSE-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: pcmpeqw %xmm2, %xmm1
+; SSE-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE-NEXT: movdqa %xmm0, %xmm2
+; SSE-NEXT: pcmpeqw %xmm1, %xmm2
; SSE-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-NEXT: pxor %xmm2, %xmm0
-; SSE-NEXT: pandn %xmm0, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: por %xmm2, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm0
; SSE-NEXT: retq
%cmp1 = icmp ne <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
%cmp2 = icmp ne <8 x i16> %x, <i16 -16385, i16 -257, i16 -33, i16 -8193, i16 -16385, i16 -257, i16 -33, i16 -8193>
@@ -311,7 +310,7 @@ define <8 x i1> @addand_ne_v8i16_fail(<8 x i16> %x) nounwind {
; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $18, %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: vpternlogq $86, %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
;
; AVX2-LABEL: addand_ne_v8i16_fail:
@@ -319,8 +318,8 @@ define <8 x i1> @addand_ne_v8i16_fail(<8 x i16> %x) nounwind {
; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
;
; SSE-LABEL: addand_ne_v8i16_fail:
@@ -329,9 +328,8 @@ define <8 x i1> @addand_ne_v8i16_fail(<8 x i16> %x) nounwind {
; SSE-NEXT: pcmpeqw %xmm0, %xmm1
; SSE-NEXT: pcmpeqd %xmm2, %xmm2
; SSE-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: por %xmm1, %xmm0
; SSE-NEXT: pxor %xmm2, %xmm0
-; SSE-NEXT: pandn %xmm0, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
%cmp1 = icmp ne <8 x i16> %x, <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>
%cmp2 = icmp ne <8 x i16> %x, <i16 16381, i16 16381, i16 16381, i16 16381, i16 16381, i16 16381, i16 16381, i16 16381>
diff --git a/llvm/test/CodeGen/X86/mul-cmp.ll b/llvm/test/CodeGen/X86/mul-cmp.ll
index 4fffb42bdc6729..0ee4601acf6948 100644
--- a/llvm/test/CodeGen/X86/mul-cmp.ll
+++ b/llvm/test/CodeGen/X86/mul-cmp.ll
@@ -119,21 +119,21 @@ define <4 x i1> @mul_nsw_ne0_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SSE-LABEL: mul_nsw_ne0_v4i32:
; SSE: # %bb.0:
; SSE-NEXT: pxor %xmm2, %xmm2
-; SSE-NEXT: pcmpeqd %xmm2, %xmm0
; SSE-NEXT: pcmpeqd %xmm2, %xmm1
-; SSE-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE-NEXT: pxor %xmm1, %xmm2
-; SSE-NEXT: pandn %xmm2, %xmm0
+; SSE-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE-NEXT: pxor %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: mul_nsw_ne0_v4i32:
; AVX: # %bb.0:
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%m = mul nsw <4 x i32> %x, %y
%r = icmp ne <4 x i32> %m, zeroinitializer
diff --git a/llvm/test/CodeGen/X86/sat-add.ll b/llvm/test/CodeGen/X86/sat-add.ll
index f41d105b6f4f45..aad13f0e829581 100644
--- a/llvm/test/CodeGen/X86/sat-add.ll
+++ b/llvm/test/CodeGen/X86/sat-add.ll
@@ -1008,10 +1008,9 @@ define <4 x i32> @unsigned_sat_variable_v4i32_using_min(<4 x i32> %x, <4 x i32>
; SSE2-NEXT: pxor %xmm1, %xmm4
; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
; SSE2-NEXT: pand %xmm4, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm4
; SSE2-NEXT: pxor %xmm2, %xmm4
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm4, %xmm2
-; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: por %xmm4, %xmm0
; SSE2-NEXT: paddd %xmm1, %xmm0
; SSE2-NEXT: retq
;
@@ -1152,10 +1151,9 @@ define <2 x i64> @unsigned_sat_variable_v2i64_using_min(<2 x i64> %x, <2 x i64>
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
; SSE2-NEXT: por %xmm3, %xmm4
; SSE2-NEXT: pand %xmm4, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm4
; SSE2-NEXT: pxor %xmm2, %xmm4
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm4, %xmm2
-; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: por %xmm4, %xmm0
; SSE2-NEXT: paddq %xmm1, %xmm0
; SSE2-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/setcc-logic.ll b/llvm/test/CodeGen/X86/setcc-logic.ll
index 933295d8d1aaa8..3faa493ebccd0d 100644
--- a/llvm/test/CodeGen/X86/setcc-logic.ll
+++ b/llvm/test/CodeGen/X86/setcc-logic.ll
@@ -557,10 +557,9 @@ define <4 x i32> @and_icmps_const_1bit_
diff _vec(<4 x i32> %x) {
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [44,60,44,60]
; CHECK-NEXT: pcmpeqd %xmm0, %xmm1
; CHECK-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
-; CHECK-NEXT: pxor %xmm0, %xmm2
-; CHECK-NEXT: pandn %xmm2, %xmm1
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: retq
%a = icmp ne <4 x i32> %x, <i32 44, i32 60, i32 44, i32 60>
%b = icmp ne <4 x i32> %x, <i32 60, i32 44, i32 60, i32 44>
diff --git a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
index 47f7555df17ccf..0d28dfc68f5bf0 100644
--- a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
+++ b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
@@ -2360,20 +2360,20 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
; CHECK-AVX1-NEXT: vpsraw $8, %xmm5, %xmm5
; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5
; CHECK-AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5
-; CHECK-AVX1-NEXT: vpackuswb %xmm4, %xmm5, %xmm4
+; CHECK-AVX1-NEXT: vpackuswb %xmm4, %xmm5, %xmm5
; CHECK-AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3
-; CHECK-AVX1-NEXT: vbroadcastss {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; CHECK-AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3
-; CHECK-AVX1-NEXT: vpaddb %xmm3, %xmm4, %xmm4
-; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; CHECK-AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; CHECK-AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
+; CHECK-AVX1-NEXT: vpaddb %xmm3, %xmm5, %xmm5
+; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm6
; CHECK-AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; CHECK-AVX1-NEXT: vpand %xmm3, %xmm6, %xmm6
-; CHECK-AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
-; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4
-; CHECK-AVX1-NEXT: vpand %xmm3, %xmm4, %xmm4
-; CHECK-AVX1-NEXT: vpackuswb %xmm6, %xmm4, %xmm4
-; CHECK-AVX1-NEXT: vpsubb %xmm4, %xmm0, %xmm4
+; CHECK-AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
+; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5
+; CHECK-AVX1-NEXT: vpand %xmm3, %xmm5, %xmm5
+; CHECK-AVX1-NEXT: vpackuswb %xmm6, %xmm5, %xmm5
+; CHECK-AVX1-NEXT: vpsubb %xmm5, %xmm0, %xmm5
; CHECK-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6
@@ -2399,27 +2399,27 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
; CHECK-AVX1-NEXT: vpackuswb %xmm7, %xmm8, %xmm7
; CHECK-AVX1-NEXT: vpsrlw $7, %xmm6, %xmm6
; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6
-; CHECK-AVX1-NEXT: vpand %xmm5, %xmm6, %xmm5
-; CHECK-AVX1-NEXT: vpaddb %xmm5, %xmm7, %xmm5
-; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; CHECK-AVX1-NEXT: vpand %xmm4, %xmm6, %xmm4
+; CHECK-AVX1-NEXT: vpaddb %xmm4, %xmm7, %xmm4
+; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6
; CHECK-AVX1-NEXT: vpand %xmm3, %xmm6, %xmm6
-; CHECK-AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
-; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5
-; CHECK-AVX1-NEXT: vpand %xmm3, %xmm5, %xmm3
+; CHECK-AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
+; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4
+; CHECK-AVX1-NEXT: vpand %xmm3, %xmm4, %xmm3
; CHECK-AVX1-NEXT: vpackuswb %xmm6, %xmm3, %xmm3
; CHECK-AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpcmpeqb %xmm2, %xmm4, %xmm4
-; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
+; CHECK-AVX1-NEXT: vpcmpeqb %xmm2, %xmm5, %xmm3
; CHECK-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
; CHECK-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; CHECK-AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
; CHECK-AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
-; CHECK-AVX1-NEXT: vandnps %ymm0, %ymm1, %ymm0
+; CHECK-AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; CHECK-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
+; CHECK-AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; CHECK-AVX1-NEXT: retq
;
; CHECK-AVX2-LABEL: pr51133:
@@ -2461,10 +2461,10 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
; CHECK-AVX2-NEXT: vpackuswb %ymm4, %ymm3, %ymm3
; CHECK-AVX2-NEXT: vpsubb %ymm3, %ymm0, %ymm0
; CHECK-AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
-; CHECK-AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
-; CHECK-AVX2-NEXT: vpxor %ymm3, %ymm0, %ymm0
; CHECK-AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
-; CHECK-AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0
+; CHECK-AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; CHECK-AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; CHECK-AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512VL-LABEL: pr51133:
diff --git a/llvm/test/CodeGen/X86/sshl_sat_vec.ll b/llvm/test/CodeGen/X86/sshl_sat_vec.ll
index 531297af2a3094..c9c62343fb61ee 100644
--- a/llvm/test/CodeGen/X86/sshl_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sshl_sat_vec.ll
@@ -37,9 +37,9 @@ define <2 x i64> @vec_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
; X64-NEXT: pand %xmm2, %xmm0
; X64-NEXT: pxor %xmm5, %xmm5
; X64-NEXT: pcmpgtd %xmm4, %xmm5
-; X64-NEXT: pcmpeqd %xmm4, %xmm4
-; X64-NEXT: pxor %xmm5, %xmm4
-; X64-NEXT: pandn %xmm4, %xmm2
+; X64-NEXT: por %xmm2, %xmm5
+; X64-NEXT: pcmpeqd %xmm2, %xmm2
+; X64-NEXT: pxor %xmm5, %xmm2
; X64-NEXT: por %xmm0, %xmm2
; X64-NEXT: pandn %xmm2, %xmm1
; X64-NEXT: por %xmm3, %xmm1
diff --git a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
index 00d122838dbc5d..58fd6492f2ed59 100644
--- a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
+++ b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
@@ -127,21 +127,14 @@ define <4 x i32> @in_constant_varx_mone_invmask(ptr%px, ptr%py, ptr%pmask) {
;
; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask:
; CHECK-SSE2: # %bb.0:
-; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0
-; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
-; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm2
-; CHECK-SSE2-NEXT: pxor %xmm1, %xmm2
-; CHECK-SSE2-NEXT: pandn %xmm2, %xmm0
-; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0
+; CHECK-SSE2-NEXT: movaps (%rdi), %xmm0
+; CHECK-SSE2-NEXT: orps (%rdx), %xmm0
; CHECK-SSE2-NEXT: retq
;
; CHECK-XOP-LABEL: in_constant_varx_mone_invmask:
; CHECK-XOP: # %bb.0:
-; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
-; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-XOP-NEXT: vpxor (%rdx), %xmm1, %xmm2
-; CHECK-XOP-NEXT: vpandn %xmm2, %xmm0, %xmm0
-; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; CHECK-XOP-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0
; CHECK-XOP-NEXT: retq
%x = load <4 x i32>, ptr%px, align 16
%y = load <4 x i32>, ptr%py, align 16
diff --git a/llvm/test/CodeGen/X86/vsplit-and.ll b/llvm/test/CodeGen/X86/vsplit-and.ll
index 85fc1447b9dea8..04869aa0b9a920 100644
--- a/llvm/test/CodeGen/X86/vsplit-and.ll
+++ b/llvm/test/CodeGen/X86/vsplit-and.ll
@@ -7,9 +7,9 @@ define void @t0(ptr %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
; CHECK-NEXT: pxor %xmm2, %xmm2
; CHECK-NEXT: pcmpeqq %xmm2, %xmm0
; CHECK-NEXT: pcmpeqq %xmm2, %xmm1
-; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
-; CHECK-NEXT: pxor %xmm1, %xmm2
-; CHECK-NEXT: pandn %xmm2, %xmm0
+; CHECK-NEXT: por %xmm0, %xmm1
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: movdqa %xmm0, (%rdi)
; CHECK-NEXT: retq
%cmp1 = icmp ne <2 x i64> %src1, zeroinitializer
@@ -23,31 +23,31 @@ define void @t0(ptr %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
define void @t2(ptr %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly {
; CHECK-LABEL: t2:
; CHECK: # %bb.0:
-; CHECK-NEXT: movq %r9, %xmm0
-; CHECK-NEXT: movq %r8, %xmm1
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; CHECK-NEXT: movq %rdx, %xmm0
+; CHECK-NEXT: movq %r9, %xmm1
+; CHECK-NEXT: movq %r8, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: movq %rdx, %xmm1
; CHECK-NEXT: movq %rsi, %xmm2
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
-; CHECK-NEXT: movq %rcx, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; CHECK-NEXT: movq %rcx, %xmm1
; CHECK-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
; CHECK-NEXT: pxor %xmm4, %xmm4
-; CHECK-NEXT: pcmpeqq %xmm4, %xmm0
+; CHECK-NEXT: pcmpeqq %xmm4, %xmm1
; CHECK-NEXT: pcmpeqq %xmm4, %xmm2
-; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[0,2]
-; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2]
+; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: pcmpeqq %xmm4, %xmm3
-; CHECK-NEXT: pcmpeqq %xmm4, %xmm1
-; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2]
-; CHECK-NEXT: xorps %xmm0, %xmm1
-; CHECK-NEXT: andnps %xmm1, %xmm2
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
-; CHECK-NEXT: psllq $63, %xmm0
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-NEXT: psrad $31, %xmm0
-; CHECK-NEXT: pmovsxdq %xmm2, %xmm1
-; CHECK-NEXT: movdqa %xmm1, (%rdi)
-; CHECK-NEXT: movq %xmm0, 16(%rdi)
+; CHECK-NEXT: pcmpeqq %xmm4, %xmm0
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
+; CHECK-NEXT: orps %xmm2, %xmm0
+; CHECK-NEXT: xorps %xmm1, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; CHECK-NEXT: psllq $63, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-NEXT: psrad $31, %xmm1
+; CHECK-NEXT: pmovsxdq %xmm0, %xmm0
+; CHECK-NEXT: movdqa %xmm0, (%rdi)
+; CHECK-NEXT: movq %xmm1, 16(%rdi)
; CHECK-NEXT: retq
%cmp1 = icmp ne <3 x i64> %src1, zeroinitializer
%cmp2 = icmp ne <3 x i64> %src2, zeroinitializer
More information about the llvm-commits
mailing list