[llvm] 7fef15e - Revert rG6aa7cc037f2f95c237c1d82c523f8857fa3a10c3 - "[X86] LowerVectorAllZero - add 512-bit support with AVX512 vptestnmd+kortestw patterns"

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 23 05:37:29 PDT 2023


Author: Simon Pilgrim
Date: 2023-03-23T12:37:19Z
New Revision: 7fef15edd4d35d5f2dcaa8cd32d5c8add028dc67

URL: https://github.com/llvm/llvm-project/commit/7fef15edd4d35d5f2dcaa8cd32d5c8add028dc67
DIFF: https://github.com/llvm/llvm-project/commit/7fef15edd4d35d5f2dcaa8cd32d5c8add028dc67.diff

LOG: Revert rG6aa7cc037f2f95c237c1d82c523f8857fa3a10c3 - "[X86] LowerVectorAllZero - add 512-bit support with AVX512 vptestnmd+kortestw patterns"

Reverted - I need to adjust the implementation so we can properly refactor it into a "LowerVectorAllEqual" function

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/ptest.ll
    llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e006388b6e928..e828fe4b9dd15 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24192,23 +24192,14 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC,
                        DAG.getConstant(0, DL, IntVT));
   }
 
-  // Split down to 128/256/512-bit vector.
-  unsigned TestSize =
-      Subtarget.useAVX512Regs() ? 512 : (Subtarget.hasAVX() ? 256 : 128);
+  // Split down to 128/256-bit vector.
+  unsigned TestSize = Subtarget.hasAVX() ? 256 : 128;
   while (VT.getSizeInBits() > TestSize) {
     auto Split = DAG.SplitVector(V, DL);
     VT = Split.first.getValueType();
     V = DAG.getNode(ISD::OR, DL, VT, Split.first, Split.second);
   }
 
-  bool UseKORTEST = Subtarget.useAVX512Regs();
-  if (UseKORTEST && VT.is512BitVector()) {
-    V = DAG.getBitcast(MVT::v16i32, MaskBits(V));
-    V = DAG.getSetCC(DL, MVT::v16i1, V,
-                     getZeroVector(MVT::v16i32, Subtarget, DAG, DL), ISD::SETEQ);
-    return DAG.getNode(X86ISD::KORTEST, DL, MVT::i32, V, V);
-  }
-
   bool UsePTEST = Subtarget.hasSSE41();
   if (UsePTEST) {
     MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;

diff  --git a/llvm/test/CodeGen/X86/ptest.ll b/llvm/test/CodeGen/X86/ptest.ll
index c417c5d15b874..066cbb6193317 100644
--- a/llvm/test/CodeGen/X86/ptest.ll
+++ b/llvm/test/CodeGen/X86/ptest.ll
@@ -148,8 +148,9 @@ define i32 @veccond512(<16 x i32> %input) {
 ;
 ; AVX512-LABEL: veccond512:
 ; AVX512:       # %bb.0: # %entry
-; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
+; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vptest %ymm0, %ymm0
 ; AVX512-NEXT:    je .LBB2_2
 ; AVX512-NEXT:  # %bb.1: # %if-true-block
 ; AVX512-NEXT:    xorl %eax, %eax
@@ -267,9 +268,10 @@ define i32 @vectest512(<16 x i32> %input) {
 ;
 ; AVX512-LABEL: vectest512:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k0
+; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    kortestw %k0, %k0
+; AVX512-NEXT:    vptest %ymm0, %ymm0
 ; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -378,8 +380,9 @@ define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) {
 ; AVX512-LABEL: vecsel512:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    movl %edi, %eax
-; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
+; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vptest %ymm0, %ymm0
 ; AVX512-NEXT:    cmovel %esi, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
index 5d921c0aa2c62..fcb0ab6090398 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
@@ -105,8 +105,9 @@ define i1 @test_v8i64(<8 x i64> %a0) {
 ;
 ; AVX512-LABEL: test_v8i64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
+; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vptest %ymm0, %ymm0
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -168,8 +169,9 @@ define i1 @test_v16i64(<16 x i64> %a0) {
 ; AVX512-LABEL: test_v16i64:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vporq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
+; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vptest %ymm0, %ymm0
 ; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -296,8 +298,9 @@ define i1 @test_v16i32(<16 x i32> %a0) {
 ;
 ; AVX512-LABEL: test_v16i32:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
+; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vptest %ymm0, %ymm0
 ; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -359,8 +362,9 @@ define i1 @test_v32i32(<32 x i32> %a0) {
 ; AVX512-LABEL: test_v32i32:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpord %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
+; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vptest %ymm0, %ymm0
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -506,8 +510,9 @@ define i1 @test_v32i16(<32 x i16> %a0) {
 ;
 ; AVX512-LABEL: test_v32i16:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
+; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vptest %ymm0, %ymm0
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -569,8 +574,9 @@ define i1 @test_v64i16(<64 x i16> %a0) {
 ; AVX512-LABEL: test_v64i16:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vporq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
+; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vptest %ymm0, %ymm0
 ; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -735,8 +741,9 @@ define i1 @test_v64i8(<64 x i8> %a0) {
 ;
 ; AVX512-LABEL: test_v64i8:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
+; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vptest %ymm0, %ymm0
 ; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -798,8 +805,9 @@ define i1 @test_v128i8(<128 x i8> %a0) {
 ; AVX512-LABEL: test_v128i8:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vporq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
+; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vptest %ymm0, %ymm0
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -1006,8 +1014,10 @@ define i1 @mask_v128i8(<128 x i8> %a0) {
 ; AVX512-LABEL: mask_v128i8:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vporq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
+; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
+; AVX512-NEXT:    vptest %ymm1, %ymm0
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq


        


More information about the llvm-commits mailing list