[llvm] 1f930cf - [X86] Fold not(pcmpeq(and(X,CstPow2),0)) -> pcmpeq(and(X,CstPow2),CstPow2) (REAPPLIED)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 26 07:14:23 PST 2024
Author: Simon Pilgrim
Date: 2024-01-26T15:13:59Z
New Revision: 1f930cf894ccb086ad77ca70b48edb1b4ed092c4
URL: https://github.com/llvm/llvm-project/commit/1f930cf894ccb086ad77ca70b48edb1b4ed092c4
DIFF: https://github.com/llvm/llvm-project/commit/1f930cf894ccb086ad77ca70b48edb1b4ed092c4.diff
LOG: [X86] Fold not(pcmpeq(and(X,CstPow2),0)) -> pcmpeq(and(X,CstPow2),CstPow2) (REAPPLIED)
Reapply b9483d30a7d7a0650a0e83c75fcb9ab4932f475a with fix (typo - wasn't ensuring icmp vs zero)
Fixes #78888
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/icmp-pow2-mask.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c17ca55eda910d1..27b6cf35ea76863 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49336,6 +49336,27 @@ static SDValue combineOrXorWithSETCC(SDNode *N, SDValue N0, SDValue N1,
}
}
+ // not(pcmpeq(and(X,CstPow2),0)) -> pcmpeq(and(X,CstPow2),CstPow2)
+ if (N->getOpcode() == ISD::XOR && N0.getOpcode() == X86ISD::PCMPEQ &&
+ N0.getOperand(0).getOpcode() == ISD::AND &&
+ ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()) &&
+ ISD::isBuildVectorAllOnes(N1.getNode())) {
+ MVT VT = N->getSimpleValueType(0);
+ APInt UndefElts;
+ SmallVector<APInt> EltBits;
+ if (getTargetConstantBitsFromNode(N0.getOperand(0).getOperand(1),
+ VT.getScalarSizeInBits(), UndefElts,
+ EltBits)) {
+ bool IsPow2OrUndef = true;
+ for (unsigned I = 0, E = EltBits.size(); I != E; ++I)
+ IsPow2OrUndef &= UndefElts[I] || EltBits[I].isPowerOf2();
+
+ if (IsPow2OrUndef)
+ return DAG.getNode(X86ISD::PCMPEQ, SDLoc(N), VT, N0.getOperand(0),
+ N0.getOperand(0).getOperand(1));
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/icmp-pow2-mask.ll b/llvm/test/CodeGen/X86/icmp-pow2-mask.ll
index 5eeb45607c8c873..6d2866f50c6c7c1 100644
--- a/llvm/test/CodeGen/X86/icmp-pow2-mask.ll
+++ b/llvm/test/CodeGen/X86/icmp-pow2-mask.ll
@@ -11,11 +11,9 @@ define <8 x i16> @pow2_mask_v16i8(i8 zeroext %0) {
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [128,128,64,64,32,32,16,16,8,8,4,4,2,2,1,1]
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: pow2_mask_v16i8:
@@ -23,11 +21,9 @@ define <8 x i16> @pow2_mask_v16i8(i8 zeroext %0) {
; SSE41-NEXT: movd %edi, %xmm0
; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
-; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: movq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,0,0,0,0,0,0,0,0]
+; SSE41-NEXT: pand %xmm1, %xmm0
+; SSE41-NEXT: pcmpeqb %xmm1, %xmm0
; SSE41-NEXT: pmovsxbw %xmm0, %xmm0
; SSE41-NEXT: retq
;
@@ -35,11 +31,9 @@ define <8 x i16> @pow2_mask_v16i8(i8 zeroext %0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX2-NEXT: retq
;
@@ -103,11 +97,9 @@ define i64 @pow2_mask_v8i8(i8 zeroext %0) {
; SSE-NEXT: movd %edi, %xmm0
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-NEXT: pxor %xmm1, %xmm1
-; SSE-NEXT: pcmpeqb %xmm0, %xmm1
-; SSE-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: movq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,0,0,0,0,0,0,0,0]
+; SSE-NEXT: pand %xmm1, %xmm0
+; SSE-NEXT: pcmpeqb %xmm1, %xmm0
; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: retq
;
@@ -115,21 +107,18 @@ define i64 @pow2_mask_v8i8(i8 zeroext %0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: retq
;
; AVX512-LABEL: pow2_mask_v8i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastb %edi, %xmm0
-; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: retq
%vec = insertelement <1 x i8> poison, i8 %0, i64 0
More information about the llvm-commits
mailing list