[llvm] r373845 - [X86] Enable AVX512BW for memcmp()

David Zarzycki via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 6 03:25:52 PDT 2019


Author: davezarzycki
Date: Sun Oct  6 03:25:52 2019
New Revision: 373845

URL: http://llvm.org/viewvc/llvm-project?rev=373845&view=rev
Log:
[X86] Enable AVX512BW for memcmp()

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/memcmp.ll
    llvm/trunk/test/CodeGen/X86/setcc-wide-types.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=373845&r1=373844&r2=373845&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Oct  6 03:25:52 2019
@@ -42354,10 +42354,12 @@ static SDValue combineVectorSizedSetCCEq
   if ((OpSize == 128 && Subtarget.hasSSE2()) ||
       (OpSize == 256 && Subtarget.hasAVX2()) ||
       (OpSize == 512 && Subtarget.useAVX512Regs())) {
-    EVT VecVT = OpSize == 512 ? MVT::v16i32 :
+    auto BW = Subtarget.hasBWI();
+    EVT VecVT = OpSize == 512 ? (BW ? MVT::v64i8 : MVT::v16i32) :
                 OpSize == 256 ? MVT::v32i8 :
                                 MVT::v16i8;
-    EVT CmpVT = OpSize == 512 ? MVT::v16i1 : VecVT;
+    EVT CmpVT = OpSize == 512 ? (BW ? MVT::v64i1 : MVT::v16i1) : VecVT;
+
     SDValue Cmp;
     if (IsOrXorXorCCZero) {
       // This is a bitwise-combined equality comparison of 2 pairs of vectors:
@@ -42377,6 +42379,9 @@ static SDValue combineVectorSizedSetCCEq
       Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ);
     }
     // For 512-bits we want to emit a setcc that will lower to kortest.
+    if (OpSize == 512 && BW)
+      return DAG.getSetCC(DL, VT, DAG.getBitcast(MVT::i64, Cmp),
+                          DAG.getConstant(0xFFFFFFFFFFFFFFFF, DL, MVT::i64), CC);
     if (OpSize == 512)
       return DAG.getSetCC(DL, VT, DAG.getBitcast(MVT::i16, Cmp),
                           DAG.getConstant(0xFFFF, DL, MVT::i16), CC);

Modified: llvm/trunk/test/CodeGen/X86/memcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memcmp.ll?rev=373845&r1=373844&r2=373845&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memcmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memcmp.ll Sun Oct  6 03:25:52 2019
@@ -6,7 +6,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx  | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512F
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512F --check-prefix=X64-AVX512BW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512BW
 
 ; This tests codegen time inlining/optimization of memcmp
 ; rdar://6480398
@@ -1551,6 +1551,15 @@ define i1 @length64_eq(i8* %x, i8* %y) n
 ; X64-AVX512F-NEXT:    setae %al
 ; X64-AVX512F-NEXT:    vzeroupper
 ; X64-AVX512F-NEXT:    retq
+;
+; X64-AVX512BW-LABEL: length64_eq:
+; X64-AVX512BW:       # %bb.0:
+; X64-AVX512BW-NEXT:    vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT:    vpcmpeqb (%rsi), %zmm0, %k0
+; X64-AVX512BW-NEXT:    kortestq %k0, %k0
+; X64-AVX512BW-NEXT:    setae %al
+; X64-AVX512BW-NEXT:    vzeroupper
+; X64-AVX512BW-NEXT:    retq
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
   %cmp = icmp ne i32 %call, 0
   ret i1 %cmp
@@ -1612,6 +1621,15 @@ define i1 @length64_eq_const(i8* %X) nou
 ; X64-AVX512F-NEXT:    setb %al
 ; X64-AVX512F-NEXT:    vzeroupper
 ; X64-AVX512F-NEXT:    retq
+;
+; X64-AVX512BW-LABEL: length64_eq_const:
+; X64-AVX512BW:       # %bb.0:
+; X64-AVX512BW-NEXT:    vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT:    vpcmpeqb {{.*}}(%rip), %zmm0, %k0
+; X64-AVX512BW-NEXT:    kortestq %k0, %k0
+; X64-AVX512BW-NEXT:    setb %al
+; X64-AVX512BW-NEXT:    vzeroupper
+; X64-AVX512BW-NEXT:    retq
   %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
   %c = icmp eq i32 %m, 0
   ret i1 %c

Modified: llvm/trunk/test/CodeGen/X86/setcc-wide-types.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/setcc-wide-types.ll?rev=373845&r1=373844&r2=373845&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/setcc-wide-types.ll (original)
+++ llvm/trunk/test/CodeGen/X86/setcc-wide-types.ll Sun Oct  6 03:25:52 2019
@@ -319,14 +319,23 @@ define i32 @ne_i512(<8 x i64> %x, <8 x i
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: ne_i512:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
-; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    kortestw %k0, %k0
-; AVX512-NEXT:    setae %al
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: ne_i512:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    xorl %eax, %eax
+; AVX512F-NEXT:    kortestw %k0, %k0
+; AVX512F-NEXT:    setae %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: ne_i512:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    xorl %eax, %eax
+; AVX512BW-NEXT:    kortestq %k0, %k0
+; AVX512BW-NEXT:    setae %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
   %bcx = bitcast <8 x i64> %x to i512
   %bcy = bitcast <8 x i64> %y to i512
   %cmp = icmp ne i512 %bcx, %bcy
@@ -464,14 +473,23 @@ define i32 @eq_i512(<8 x i64> %x, <8 x i
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: eq_i512:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
-; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    kortestw %k0, %k0
-; AVX512-NEXT:    setb %al
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: eq_i512:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    xorl %eax, %eax
+; AVX512F-NEXT:    kortestw %k0, %k0
+; AVX512F-NEXT:    setb %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: eq_i512:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    xorl %eax, %eax
+; AVX512BW-NEXT:    kortestq %k0, %k0
+; AVX512BW-NEXT:    setb %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
   %bcx = bitcast <8 x i64> %x to i512
   %bcy = bitcast <8 x i64> %y to i512
   %cmp = icmp eq i512 %bcx, %bcy
@@ -804,17 +822,29 @@ define i32 @ne_i512_pair(i512* %a, i512*
 ; NO512-NEXT:    setne %al
 ; NO512-NEXT:    retq
 ;
-; AVX512-LABEL: ne_i512_pair:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm0
-; AVX512-NEXT:    vmovdqu64 64(%rdi), %zmm1
-; AVX512-NEXT:    vpcmpeqd (%rsi), %zmm0, %k1
-; AVX512-NEXT:    vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
-; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    kortestw %k0, %k0
-; AVX512-NEXT:    setae %al
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: ne_i512_pair:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vmovdqu64 (%rdi), %zmm0
+; AVX512F-NEXT:    vmovdqu64 64(%rdi), %zmm1
+; AVX512F-NEXT:    vpcmpeqd (%rsi), %zmm0, %k1
+; AVX512F-NEXT:    vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
+; AVX512F-NEXT:    xorl %eax, %eax
+; AVX512F-NEXT:    kortestw %k0, %k0
+; AVX512F-NEXT:    setae %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: ne_i512_pair:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vmovdqu64 (%rdi), %zmm0
+; AVX512BW-NEXT:    vmovdqu64 64(%rdi), %zmm1
+; AVX512BW-NEXT:    vpcmpeqb (%rsi), %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpeqb 64(%rsi), %zmm1, %k0 {%k1}
+; AVX512BW-NEXT:    xorl %eax, %eax
+; AVX512BW-NEXT:    kortestq %k0, %k0
+; AVX512BW-NEXT:    setae %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
   %a0 = load i512, i512* %a
   %b0 = load i512, i512* %b
   %xor1 = xor i512 %a0, %b0
@@ -886,17 +916,29 @@ define i32 @eq_i512_pair(i512* %a, i512*
 ; NO512-NEXT:    sete %al
 ; NO512-NEXT:    retq
 ;
-; AVX512-LABEL: eq_i512_pair:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm0
-; AVX512-NEXT:    vmovdqu64 64(%rdi), %zmm1
-; AVX512-NEXT:    vpcmpeqd (%rsi), %zmm0, %k1
-; AVX512-NEXT:    vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
-; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    kortestw %k0, %k0
-; AVX512-NEXT:    setb %al
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: eq_i512_pair:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vmovdqu64 (%rdi), %zmm0
+; AVX512F-NEXT:    vmovdqu64 64(%rdi), %zmm1
+; AVX512F-NEXT:    vpcmpeqd (%rsi), %zmm0, %k1
+; AVX512F-NEXT:    vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
+; AVX512F-NEXT:    xorl %eax, %eax
+; AVX512F-NEXT:    kortestw %k0, %k0
+; AVX512F-NEXT:    setb %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: eq_i512_pair:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vmovdqu64 (%rdi), %zmm0
+; AVX512BW-NEXT:    vmovdqu64 64(%rdi), %zmm1
+; AVX512BW-NEXT:    vpcmpeqb (%rsi), %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpeqb 64(%rsi), %zmm1, %k0 {%k1}
+; AVX512BW-NEXT:    xorl %eax, %eax
+; AVX512BW-NEXT:    kortestq %k0, %k0
+; AVX512BW-NEXT:    setb %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
   %a0 = load i512, i512* %a
   %b0 = load i512, i512* %b
   %xor1 = xor i512 %a0, %b0




More information about the llvm-commits mailing list