[PATCH] D68445: Enable AVX512 memcmp()
David Zarzycki via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 3 23:49:07 PDT 2019
davezarzycki created this revision.
davezarzycki added a reviewer: craig.topper.
davezarzycki added a project: LLVM.
Unless I'm missing something, the "TODO" is stale.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D68445
Files:
lib/Target/X86/X86TargetTransformInfo.cpp
test/CodeGen/X86/memcmp.ll
Index: test/CodeGen/X86/memcmp.ll
===================================================================
--- test/CodeGen/X86/memcmp.ll
+++ test/CodeGen/X86/memcmp.ll
@@ -5,6 +5,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512F --check-prefix=X64-AVX512BW
; This tests codegen time inlining/optimization of memcmp
; rdar://6480398
@@ -1540,6 +1542,15 @@
; X64-AVX2-NEXT: setne %al
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
+;
+; X64-AVX512F-LABEL: length64_eq:
+; X64-AVX512F: # %bb.0:
+; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k0
+; X64-AVX512F-NEXT: kortestw %k0, %k0
+; X64-AVX512F-NEXT: setae %al
+; X64-AVX512F-NEXT: vzeroupper
+; X64-AVX512F-NEXT: retq
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
%cmp = icmp ne i32 %call, 0
ret i1 %cmp
@@ -1592,6 +1603,15 @@
; X64-AVX2-NEXT: sete %al
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
+;
+; X64-AVX512F-LABEL: length64_eq_const:
+; X64-AVX512F: # %bb.0:
+; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512F-NEXT: vpcmpeqd {{.*}}(%rip), %zmm0, %k0
+; X64-AVX512F-NEXT: kortestw %k0, %k0
+; X64-AVX512F-NEXT: setb %al
+; X64-AVX512F-NEXT: vzeroupper
+; X64-AVX512F-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
%c = icmp eq i32 %m, 0
ret i1 %c
Index: lib/Target/X86/X86TargetTransformInfo.cpp
===================================================================
--- lib/Target/X86/X86TargetTransformInfo.cpp
+++ lib/Target/X86/X86TargetTransformInfo.cpp
@@ -3394,9 +3394,8 @@
if (IsZeroCmp) {
// Only enable vector loads for equality comparison. Right now the vector
// version is not as fast for three way compare (see #33329).
- // TODO: enable AVX512 when the DAG is ready.
- // if (ST->hasAVX512()) Options.LoadSizes.push_back(64);
const unsigned PreferredWidth = ST->getPreferVectorWidth();
+ if (PreferredWidth >= 512 && ST->hasAVX512()) Options.LoadSizes.push_back(64);
if (PreferredWidth >= 256 && ST->hasAVX2()) Options.LoadSizes.push_back(32);
if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16);
// All GPR and vector loads can be unaligned. SIMD compare requires integer
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D68445.223145.patch
Type: text/x-patch
Size: 2975 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20191004/32c51938/attachment.bin>
More information about the llvm-commits
mailing list