[llvm] r298933 - [x86] use VPMOVMSK to replace memcmp libcalls for 32-byte equality
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 28 10:23:50 PDT 2017
Author: spatel
Date: Tue Mar 28 12:23:49 2017
New Revision: 298933
URL: http://llvm.org/viewvc/llvm-project?rev=298933&view=rev
Log:
[x86] use VPMOVMSK to replace memcmp libcalls for 32-byte equality
Follow-up to:
https://reviews.llvm.org/rL298775
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/memcmp.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=298933&r1=298932&r2=298933&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Tue Mar 28 12:23:49 2017
@@ -6069,20 +6069,20 @@ bool SelectionDAGBuilder::visitMemCmpCal
// supports the MVT we'll be loading or if it is small enough (<= 4) that
// we'll only produce a small number of byte loads.
MVT LoadVT;
- switch (CSize->getZExtValue()) {
+ unsigned NumBitsToCompare = CSize->getZExtValue() * 8;
+ switch (NumBitsToCompare) {
default:
return false;
- case 2:
+ case 16:
LoadVT = MVT::i16;
break;
- case 4:
+ case 32:
LoadVT = MVT::i32;
break;
- case 8:
- LoadVT = hasFastLoadsAndCompare(64);
- break;
- case 16:
- LoadVT = hasFastLoadsAndCompare(128);
+ case 64:
+ case 128:
+ case 256:
+ LoadVT = hasFastLoadsAndCompare(NumBitsToCompare);
break;
}
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=298933&r1=298932&r2=298933&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Mar 28 12:23:49 2017
@@ -4646,8 +4646,12 @@ MVT X86TargetLowering::hasFastEqualityCo
if (NumBits == 128 && isTypeLegal(MVT::v16i8))
return MVT::v16i8;
+ // VPMOVMSKB can handle this.
+ if (NumBits == 256 && isTypeLegal(MVT::v32i8))
+ return MVT::v32i8;
+
// TODO: Allow 64-bit type for 32-bit target.
- // TODO: 256- and 512-bit types should be allowed, but make sure that those
+ // TODO: 512-bit types should be allowed, but make sure that those
// cases are handled in combineVectorSizedSetCCEquality().
return MVT::INVALID_SIMPLE_VALUE_TYPE;
Modified: llvm/trunk/test/CodeGen/X86/memcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memcmp.ll?rev=298933&r1=298932&r2=298933&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memcmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memcmp.ll Tue Mar 28 12:23:49 2017
@@ -249,15 +249,25 @@ define i1 @length32(i8* %x, i8* %y) noun
; X32-NEXT: sete %al
; X32-NEXT: retl
;
-; X64-LABEL: length32:
-; X64: # BB#0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $32, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: sete %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
+; SSE2-LABEL: length32:
+; SSE2: # BB#0:
+; SSE2-NEXT: pushq %rax
+; SSE2-NEXT: movl $32, %edx
+; SSE2-NEXT: callq memcmp
+; SSE2-NEXT: testl %eax, %eax
+; SSE2-NEXT: sete %al
+; SSE2-NEXT: popq %rcx
+; SSE2-NEXT: retq
+;
+; AVX2-LABEL: length32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
+; AVX2-NEXT: vpmovmskb %ymm0, %eax
+; AVX2-NEXT: cmpl $-1, %eax
+; AVX2-NEXT: sete %al
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
@@ -276,16 +286,26 @@ define i1 @length32_const(i8* %X, i32* n
; X32-NEXT: setne %al
; X32-NEXT: retl
;
-; X64-LABEL: length32_const:
-; X64: # BB#0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: movl $.L.str, %esi
-; X64-NEXT: movl $32, %edx
-; X64-NEXT: callq memcmp
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: setne %al
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
+; SSE2-LABEL: length32_const:
+; SSE2: # BB#0:
+; SSE2-NEXT: pushq %rax
+; SSE2-NEXT: movl $.L.str, %esi
+; SSE2-NEXT: movl $32, %edx
+; SSE2-NEXT: callq memcmp
+; SSE2-NEXT: testl %eax, %eax
+; SSE2-NEXT: setne %al
+; SSE2-NEXT: popq %rcx
+; SSE2-NEXT: retq
+;
+; AVX2-LABEL: length32_const:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vpmovmskb %ymm0, %eax
+; AVX2-NEXT: cmpl $-1, %eax
+; AVX2-NEXT: setne %al
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 32) nounwind
%c = icmp ne i32 %m, 0
ret i1 %c
More information about the llvm-commits
mailing list