[llvm] b9483d3 - Revert "[X86] Fold not(pcmpeq(and(X, CstPow2), 0)) -> pcmpeq(and(X,CstPow2),CstPow2)"

Douglas Yung via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 25 09:40:39 PST 2024


Author: Douglas Yung
Date: 2024-01-25T09:40:16-08:00
New Revision: b9483d30a7d7a0650a0e83c75fcb9ab4932f475a

URL: https://github.com/llvm/llvm-project/commit/b9483d30a7d7a0650a0e83c75fcb9ab4932f475a
DIFF: https://github.com/llvm/llvm-project/commit/b9483d30a7d7a0650a0e83c75fcb9ab4932f475a.diff

LOG: Revert "[X86] Fold not(pcmpeq(and(X,CstPow2),0)) -> pcmpeq(and(X,CstPow2),CstPow2)"

This reverts commit 72f10f7eb536da58cb79e13974895cd97d4e1a5f.

This change was causing a miscompile on an internal test and is being reverted at the author's request until it can be fixed.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/icmp-pow2-mask.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5cc2803b2808797..86de5dd0c3f0974 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49328,26 +49328,6 @@ static SDValue combineOrXorWithSETCC(SDNode *N, SDValue N0, SDValue N1,
     }
   }
 
-  // not(pcmpeq(and(X,CstPow2),0)) -> pcmpeq(and(X,CstPow2),CstPow2)
-  if (N->getOpcode() == ISD::XOR && N0.getOpcode() == X86ISD::PCMPEQ &&
-      N0.getOperand(0).getOpcode() == ISD::AND &&
-      ISD::isBuildVectorAllOnes(N1.getNode())) {
-    MVT VT = N->getSimpleValueType(0);
-    APInt UndefElts;
-    SmallVector<APInt> EltBits;
-    if (getTargetConstantBitsFromNode(N0.getOperand(0).getOperand(1),
-                                      VT.getScalarSizeInBits(), UndefElts,
-                                      EltBits)) {
-      bool IsPow2OrUndef = true;
-      for (unsigned I = 0, E = EltBits.size(); I != E; ++I)
-        IsPow2OrUndef &= UndefElts[I] || EltBits[I].isPowerOf2();
-
-      if (IsPow2OrUndef)
-        return DAG.getNode(X86ISD::PCMPEQ, SDLoc(N), VT, N0.getOperand(0),
-                           N0.getOperand(0).getOperand(1));
-    }
-  }
-
   return SDValue();
 }
 

diff  --git a/llvm/test/CodeGen/X86/icmp-pow2-mask.ll b/llvm/test/CodeGen/X86/icmp-pow2-mask.ll
index 6d2866f50c6c7c1..5eeb45607c8c873 100644
--- a/llvm/test/CodeGen/X86/icmp-pow2-mask.ll
+++ b/llvm/test/CodeGen/X86/icmp-pow2-mask.ll
@@ -11,9 +11,11 @@ define <8 x i16> @pow2_mask_v16i8(i8 zeroext %0) {
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [128,128,64,64,32,32,16,16,8,8,4,4,2,2,1,1]
-; SSE2-NEXT:    pand %xmm1, %xmm0
-; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: pow2_mask_v16i8:
@@ -21,9 +23,11 @@ define <8 x i16> @pow2_mask_v16i8(i8 zeroext %0) {
 ; SSE41-NEXT:    movd %edi, %xmm0
 ; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE41-NEXT:    movq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,0,0,0,0,0,0,0,0]
-; SSE41-NEXT:    pand %xmm1, %xmm0
-; SSE41-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm0
 ; SSE41-NEXT:    pmovsxbw %xmm0, %xmm0
 ; SSE41-NEXT:    retq
 ;
@@ -31,9 +35,11 @@ define <8 x i16> @pow2_mask_v16i8(i8 zeroext %0) {
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vmovd %edi, %xmm0
 ; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
-; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
-; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpmovsxbw %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -97,9 +103,11 @@ define i64 @pow2_mask_v8i8(i8 zeroext %0) {
 ; SSE-NEXT:    movd %edi, %xmm0
 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE-NEXT:    movq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,0,0,0,0,0,0,0,0]
-; SSE-NEXT:    pand %xmm1, %xmm0
-; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm0
 ; SSE-NEXT:    movq %xmm0, %rax
 ; SSE-NEXT:    retq
 ;
@@ -107,18 +115,21 @@ define i64 @pow2_mask_v8i8(i8 zeroext %0) {
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vmovd %edi, %xmm0
 ; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
-; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
-; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovq %xmm0, %rax
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: pow2_mask_v8i8:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpbroadcastb %edi, %xmm0
-; AVX512-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovq %xmm0, %rax
 ; AVX512-NEXT:    retq
   %vec = insertelement <1 x i8> poison, i8 %0, i64 0


        


More information about the llvm-commits mailing list