[llvm] r373845 - [X86] Enable AVX512BW for memcmp()
David Zarzycki via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 6 03:25:52 PDT 2019
Author: davezarzycki
Date: Sun Oct 6 03:25:52 2019
New Revision: 373845
URL: http://llvm.org/viewvc/llvm-project?rev=373845&view=rev
Log:
[X86] Enable AVX512BW for memcmp()
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/memcmp.ll
llvm/trunk/test/CodeGen/X86/setcc-wide-types.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=373845&r1=373844&r2=373845&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Oct 6 03:25:52 2019
@@ -42354,10 +42354,12 @@ static SDValue combineVectorSizedSetCCEq
if ((OpSize == 128 && Subtarget.hasSSE2()) ||
(OpSize == 256 && Subtarget.hasAVX2()) ||
(OpSize == 512 && Subtarget.useAVX512Regs())) {
- EVT VecVT = OpSize == 512 ? MVT::v16i32 :
+ auto BW = Subtarget.hasBWI();
+ EVT VecVT = OpSize == 512 ? (BW ? MVT::v64i8 : MVT::v16i32) :
OpSize == 256 ? MVT::v32i8 :
MVT::v16i8;
- EVT CmpVT = OpSize == 512 ? MVT::v16i1 : VecVT;
+ EVT CmpVT = OpSize == 512 ? (BW ? MVT::v64i1 : MVT::v16i1) : VecVT;
+
SDValue Cmp;
if (IsOrXorXorCCZero) {
// This is a bitwise-combined equality comparison of 2 pairs of vectors:
@@ -42377,6 +42379,9 @@ static SDValue combineVectorSizedSetCCEq
Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ);
}
// For 512-bits we want to emit a setcc that will lower to kortest.
+ if (OpSize == 512 && BW)
+ return DAG.getSetCC(DL, VT, DAG.getBitcast(MVT::i64, Cmp),
+ DAG.getConstant(0xFFFFFFFFFFFFFFFF, DL, MVT::i64), CC);
if (OpSize == 512)
return DAG.getSetCC(DL, VT, DAG.getBitcast(MVT::i16, Cmp),
DAG.getConstant(0xFFFF, DL, MVT::i16), CC);
Modified: llvm/trunk/test/CodeGen/X86/memcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memcmp.ll?rev=373845&r1=373844&r2=373845&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memcmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memcmp.ll Sun Oct 6 03:25:52 2019
@@ -6,7 +6,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512F
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512F --check-prefix=X64-AVX512BW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512BW
; This tests codegen time inlining/optimization of memcmp
; rdar://6480398
@@ -1551,6 +1551,15 @@ define i1 @length64_eq(i8* %x, i8* %y) n
; X64-AVX512F-NEXT: setae %al
; X64-AVX512F-NEXT: vzeroupper
; X64-AVX512F-NEXT: retq
+;
+; X64-AVX512BW-LABEL: length64_eq:
+; X64-AVX512BW: # %bb.0:
+; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k0
+; X64-AVX512BW-NEXT: kortestq %k0, %k0
+; X64-AVX512BW-NEXT: setae %al
+; X64-AVX512BW-NEXT: vzeroupper
+; X64-AVX512BW-NEXT: retq
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
%cmp = icmp ne i32 %call, 0
ret i1 %cmp
@@ -1612,6 +1621,15 @@ define i1 @length64_eq_const(i8* %X) nou
; X64-AVX512F-NEXT: setb %al
; X64-AVX512F-NEXT: vzeroupper
; X64-AVX512F-NEXT: retq
+;
+; X64-AVX512BW-LABEL: length64_eq_const:
+; X64-AVX512BW: # %bb.0:
+; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm0, %k0
+; X64-AVX512BW-NEXT: kortestq %k0, %k0
+; X64-AVX512BW-NEXT: setb %al
+; X64-AVX512BW-NEXT: vzeroupper
+; X64-AVX512BW-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
%c = icmp eq i32 %m, 0
ret i1 %c
Modified: llvm/trunk/test/CodeGen/X86/setcc-wide-types.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/setcc-wide-types.ll?rev=373845&r1=373844&r2=373845&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/setcc-wide-types.ll (original)
+++ llvm/trunk/test/CodeGen/X86/setcc-wide-types.ll Sun Oct 6 03:25:52 2019
@@ -319,14 +319,23 @@ define i32 @ne_i512(<8 x i64> %x, <8 x i
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: ne_i512:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: setae %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512F-LABEL: ne_i512:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT: xorl %eax, %eax
+; AVX512F-NEXT: kortestw %k0, %k0
+; AVX512F-NEXT: setae %al
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: ne_i512:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: xorl %eax, %eax
+; AVX512BW-NEXT: kortestq %k0, %k0
+; AVX512BW-NEXT: setae %al
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
%bcx = bitcast <8 x i64> %x to i512
%bcy = bitcast <8 x i64> %y to i512
%cmp = icmp ne i512 %bcx, %bcy
@@ -464,14 +473,23 @@ define i32 @eq_i512(<8 x i64> %x, <8 x i
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: eq_i512:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: setb %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512F-LABEL: eq_i512:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT: xorl %eax, %eax
+; AVX512F-NEXT: kortestw %k0, %k0
+; AVX512F-NEXT: setb %al
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: eq_i512:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: xorl %eax, %eax
+; AVX512BW-NEXT: kortestq %k0, %k0
+; AVX512BW-NEXT: setb %al
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
%bcx = bitcast <8 x i64> %x to i512
%bcy = bitcast <8 x i64> %y to i512
%cmp = icmp eq i512 %bcx, %bcy
@@ -804,17 +822,29 @@ define i32 @ne_i512_pair(i512* %a, i512*
; NO512-NEXT: setne %al
; NO512-NEXT: retq
;
-; AVX512-LABEL: ne_i512_pair:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
-; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1
-; AVX512-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
-; AVX512-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: setae %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512F-LABEL: ne_i512_pair:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
+; AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
+; AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
+; AVX512F-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
+; AVX512F-NEXT: xorl %eax, %eax
+; AVX512F-NEXT: kortestw %k0, %k0
+; AVX512F-NEXT: setae %al
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: ne_i512_pair:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
+; AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
+; AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k1
+; AVX512BW-NEXT: vpcmpeqb 64(%rsi), %zmm1, %k0 {%k1}
+; AVX512BW-NEXT: xorl %eax, %eax
+; AVX512BW-NEXT: kortestq %k0, %k0
+; AVX512BW-NEXT: setae %al
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
%a0 = load i512, i512* %a
%b0 = load i512, i512* %b
%xor1 = xor i512 %a0, %b0
@@ -886,17 +916,29 @@ define i32 @eq_i512_pair(i512* %a, i512*
; NO512-NEXT: sete %al
; NO512-NEXT: retq
;
-; AVX512-LABEL: eq_i512_pair:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
-; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1
-; AVX512-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
-; AVX512-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: setb %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512F-LABEL: eq_i512_pair:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
+; AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
+; AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
+; AVX512F-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
+; AVX512F-NEXT: xorl %eax, %eax
+; AVX512F-NEXT: kortestw %k0, %k0
+; AVX512F-NEXT: setb %al
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: eq_i512_pair:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
+; AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
+; AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k1
+; AVX512BW-NEXT: vpcmpeqb 64(%rsi), %zmm1, %k0 {%k1}
+; AVX512BW-NEXT: xorl %eax, %eax
+; AVX512BW-NEXT: kortestq %k0, %k0
+; AVX512BW-NEXT: setb %al
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
%a0 = load i512, i512* %a
%b0 = load i512, i512* %b
%xor1 = xor i512 %a0, %b0
More information about the llvm-commits
mailing list