[llvm] 657e424 - [X86] Fix 48/96 byte memcmp code gen
David Zarzycki via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 27 23:42:19 PDT 2019
Author: David Zarzycki
Date: 2019-10-28T08:41:45+02:00
New Revision: 657e4240b15ffb8a24c5a704a927a7848f3f40ee
URL: https://github.com/llvm/llvm-project/commit/657e4240b15ffb8a24c5a704a927a7848f3f40ee
DIFF: https://github.com/llvm/llvm-project/commit/657e4240b15ffb8a24c5a704a927a7848f3f40ee.diff
LOG: [X86] Fix 48/96 byte memcmp code gen
Detect scalar ISD::ZERO_EXTEND generated by memcmp lowering and convert
it to ISD::INSERT_SUBVECTOR.
https://reviews.llvm.org/D69464
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/memcmp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 932839c619a5..b38329eb5271 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -42637,6 +42637,7 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
CmpVT = PreferKOT ? MVT::v32i1 : VecVT;
}
EVT CastVT = VecVT;
+ bool NeedsAVX512FCast = false;
if (OpSize == 512 || NeedZExt) {
if (Subtarget.hasBWI()) {
VecVT = MVT::v64i8;
@@ -42648,12 +42649,30 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
CmpVT = MVT::v16i1;
CastVT = OpSize == 512 ? VecVT :
OpSize == 256 ? MVT::v8i32 : MVT::v4i32;
+ NeedsAVX512FCast = true;
}
}
auto ScalarToVector = [&](SDValue X) -> SDValue {
- X = DAG.getBitcast(CastVT, X);
- if (!NeedZExt)
+ bool TmpZext = false;
+ EVT TmpCastVT = CastVT;
+ if (X.getOpcode() == ISD::ZERO_EXTEND) {
+ SDValue OrigX = X.getOperand(0);
+ unsigned OrigSize = OrigX.getScalarValueSizeInBits();
+ if (OrigSize < OpSize) {
+ if (OrigSize == 128) {
+ TmpCastVT = NeedsAVX512FCast ? MVT::v4i32 : MVT::v16i8;
+ X = OrigX;
+ TmpZext = true;
+ } else if (OrigSize == 256) {
+ TmpCastVT = NeedsAVX512FCast ? MVT::v8i32 : MVT::v32i8;
+ X = OrigX;
+ TmpZext = true;
+ }
+ }
+ }
+ X = DAG.getBitcast(TmpCastVT, X);
+ if (!NeedZExt && !TmpZext)
return X;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MVT VecIdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll
index 161e03734977..04f3ade7536f 100644
--- a/llvm/test/CodeGen/X86/memcmp.ll
+++ b/llvm/test/CodeGen/X86/memcmp.ll
@@ -2383,95 +2383,9 @@ define i1 @length48_eq(i8* %x, i8* %y) nounwind {
;
; X64-AVX2-LABEL: length48_eq:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: movq 32(%rdi), %rcx
-; X64-AVX2-NEXT: movq %rcx, %rax
-; X64-AVX2-NEXT: movl %ecx, %edx
-; X64-AVX2-NEXT: shrl $8, %edx
-; X64-AVX2-NEXT: vmovd %ecx, %xmm0
-; X64-AVX2-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; X64-AVX2-NEXT: movl %ecx, %edx
-; X64-AVX2-NEXT: shrl $16, %edx
-; X64-AVX2-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; X64-AVX2-NEXT: movl %ecx, %edx
-; X64-AVX2-NEXT: shrl $24, %edx
-; X64-AVX2-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
-; X64-AVX2-NEXT: movq %rcx, %rdx
-; X64-AVX2-NEXT: shrq $32, %rdx
-; X64-AVX2-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0
-; X64-AVX2-NEXT: movq %rcx, %rdx
-; X64-AVX2-NEXT: shrq $40, %rcx
-; X64-AVX2-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm1
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: movq 40(%rdi), %rcx
-; X64-AVX2-NEXT: shrq $48, %rdx
-; X64-AVX2-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1
-; X64-AVX2-NEXT: movq %rcx, %rdx
-; X64-AVX2-NEXT: shrq $56, %rdx
-; X64-AVX2-NEXT: shrq $56, %rax
-; X64-AVX2-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; X64-AVX2-NEXT: movl %ecx, %eax
-; X64-AVX2-NEXT: shrl $8, %eax
-; X64-AVX2-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
-; X64-AVX2-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; X64-AVX2-NEXT: movl %ecx, %eax
-; X64-AVX2-NEXT: shrl $16, %eax
-; X64-AVX2-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; X64-AVX2-NEXT: movl %ecx, %eax
-; X64-AVX2-NEXT: shrl $24, %eax
-; X64-AVX2-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; X64-AVX2-NEXT: movq %rcx, %rax
-; X64-AVX2-NEXT: shrq $32, %rax
-; X64-AVX2-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; X64-AVX2-NEXT: movq %rcx, %rax
-; X64-AVX2-NEXT: shrq $48, %rax
-; X64-AVX2-NEXT: shrq $40, %rcx
-; X64-AVX2-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
-; X64-AVX2-NEXT: movq 32(%rsi), %rcx
-; X64-AVX2-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; X64-AVX2-NEXT: movq %rcx, %rax
-; X64-AVX2-NEXT: vpinsrb $15, %edx, %xmm1, %xmm1
-; X64-AVX2-NEXT: movl %ecx, %edx
-; X64-AVX2-NEXT: shrl $8, %edx
-; X64-AVX2-NEXT: vmovd %ecx, %xmm2
-; X64-AVX2-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2
-; X64-AVX2-NEXT: movl %ecx, %edx
-; X64-AVX2-NEXT: shrl $16, %edx
-; X64-AVX2-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2
-; X64-AVX2-NEXT: movl %ecx, %edx
-; X64-AVX2-NEXT: shrl $24, %edx
-; X64-AVX2-NEXT: vpinsrb $3, %edx, %xmm2, %xmm2
-; X64-AVX2-NEXT: movq %rcx, %rdx
-; X64-AVX2-NEXT: shrq $32, %rdx
-; X64-AVX2-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2
-; X64-AVX2-NEXT: movq %rcx, %rdx
-; X64-AVX2-NEXT: shrq $40, %rcx
-; X64-AVX2-NEXT: vpinsrb $5, %ecx, %xmm2, %xmm2
-; X64-AVX2-NEXT: movq 40(%rsi), %rcx
-; X64-AVX2-NEXT: shrq $48, %rdx
-; X64-AVX2-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
-; X64-AVX2-NEXT: movq %rcx, %rdx
-; X64-AVX2-NEXT: shrq $56, %rax
-; X64-AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; X64-AVX2-NEXT: movl %ecx, %eax
-; X64-AVX2-NEXT: shrl $8, %eax
-; X64-AVX2-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
-; X64-AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; X64-AVX2-NEXT: movl %ecx, %eax
-; X64-AVX2-NEXT: shrl $16, %eax
-; X64-AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; X64-AVX2-NEXT: movl %ecx, %eax
-; X64-AVX2-NEXT: shrl $24, %eax
-; X64-AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; X64-AVX2-NEXT: movq %rcx, %rax
-; X64-AVX2-NEXT: shrq $32, %rax
-; X64-AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; X64-AVX2-NEXT: movq %rcx, %rax
-; X64-AVX2-NEXT: shrq $40, %rcx
-; X64-AVX2-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2
-; X64-AVX2-NEXT: shrq $48, %rax
-; X64-AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; X64-AVX2-NEXT: shrq $56, %rdx
-; X64-AVX2-NEXT: vpinsrb $15, %edx, %xmm2, %xmm2
+; X64-AVX2-NEXT: vmovdqu 32(%rdi), %xmm1
+; X64-AVX2-NEXT: vmovdqu 32(%rsi), %xmm2
; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1
; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
@@ -2482,95 +2396,9 @@ define i1 @length48_eq(i8* %x, i8* %y) nounwind {
;
; X64-AVX512-LABEL: length48_eq:
; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: movq 32(%rdi), %rcx
-; X64-AVX512-NEXT: movq %rcx, %rax
-; X64-AVX512-NEXT: movl %ecx, %edx
-; X64-AVX512-NEXT: shrl $8, %edx
-; X64-AVX512-NEXT: vmovd %ecx, %xmm0
-; X64-AVX512-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; X64-AVX512-NEXT: movl %ecx, %edx
-; X64-AVX512-NEXT: shrl $16, %edx
-; X64-AVX512-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; X64-AVX512-NEXT: movl %ecx, %edx
-; X64-AVX512-NEXT: shrl $24, %edx
-; X64-AVX512-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
-; X64-AVX512-NEXT: movq %rcx, %rdx
-; X64-AVX512-NEXT: shrq $32, %rdx
-; X64-AVX512-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0
-; X64-AVX512-NEXT: movq %rcx, %rdx
-; X64-AVX512-NEXT: shrq $40, %rcx
-; X64-AVX512-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm1
; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX512-NEXT: movq 40(%rdi), %rcx
-; X64-AVX512-NEXT: shrq $48, %rdx
-; X64-AVX512-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1
-; X64-AVX512-NEXT: movq %rcx, %rdx
-; X64-AVX512-NEXT: shrq $56, %rdx
-; X64-AVX512-NEXT: shrq $56, %rax
-; X64-AVX512-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; X64-AVX512-NEXT: movl %ecx, %eax
-; X64-AVX512-NEXT: shrl $8, %eax
-; X64-AVX512-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
-; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; X64-AVX512-NEXT: movl %ecx, %eax
-; X64-AVX512-NEXT: shrl $16, %eax
-; X64-AVX512-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; X64-AVX512-NEXT: movl %ecx, %eax
-; X64-AVX512-NEXT: shrl $24, %eax
-; X64-AVX512-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; X64-AVX512-NEXT: movq %rcx, %rax
-; X64-AVX512-NEXT: shrq $32, %rax
-; X64-AVX512-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; X64-AVX512-NEXT: movq %rcx, %rax
-; X64-AVX512-NEXT: shrq $48, %rax
-; X64-AVX512-NEXT: shrq $40, %rcx
-; X64-AVX512-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
-; X64-AVX512-NEXT: movq 32(%rsi), %rcx
-; X64-AVX512-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; X64-AVX512-NEXT: movq %rcx, %rax
-; X64-AVX512-NEXT: vpinsrb $15, %edx, %xmm1, %xmm1
-; X64-AVX512-NEXT: movl %ecx, %edx
-; X64-AVX512-NEXT: shrl $8, %edx
-; X64-AVX512-NEXT: vmovd %ecx, %xmm2
-; X64-AVX512-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2
-; X64-AVX512-NEXT: movl %ecx, %edx
-; X64-AVX512-NEXT: shrl $16, %edx
-; X64-AVX512-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2
-; X64-AVX512-NEXT: movl %ecx, %edx
-; X64-AVX512-NEXT: shrl $24, %edx
-; X64-AVX512-NEXT: vpinsrb $3, %edx, %xmm2, %xmm2
-; X64-AVX512-NEXT: movq %rcx, %rdx
-; X64-AVX512-NEXT: shrq $32, %rdx
-; X64-AVX512-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2
-; X64-AVX512-NEXT: movq %rcx, %rdx
-; X64-AVX512-NEXT: shrq $40, %rcx
-; X64-AVX512-NEXT: vpinsrb $5, %ecx, %xmm2, %xmm2
-; X64-AVX512-NEXT: movq 40(%rsi), %rcx
-; X64-AVX512-NEXT: shrq $48, %rdx
-; X64-AVX512-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
-; X64-AVX512-NEXT: movq %rcx, %rdx
-; X64-AVX512-NEXT: shrq $56, %rax
-; X64-AVX512-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; X64-AVX512-NEXT: movl %ecx, %eax
-; X64-AVX512-NEXT: shrl $8, %eax
-; X64-AVX512-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
-; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; X64-AVX512-NEXT: movl %ecx, %eax
-; X64-AVX512-NEXT: shrl $16, %eax
-; X64-AVX512-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; X64-AVX512-NEXT: movl %ecx, %eax
-; X64-AVX512-NEXT: shrl $24, %eax
-; X64-AVX512-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; X64-AVX512-NEXT: movq %rcx, %rax
-; X64-AVX512-NEXT: shrq $32, %rax
-; X64-AVX512-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; X64-AVX512-NEXT: movq %rcx, %rax
-; X64-AVX512-NEXT: shrq $40, %rcx
-; X64-AVX512-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2
-; X64-AVX512-NEXT: shrq $48, %rax
-; X64-AVX512-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; X64-AVX512-NEXT: shrq $56, %rdx
-; X64-AVX512-NEXT: vpinsrb $15, %edx, %xmm2, %xmm2
+; X64-AVX512-NEXT: vmovdqu 32(%rdi), %xmm1
+; X64-AVX512-NEXT: vmovdqu 32(%rsi), %xmm2
; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0
; X64-AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm1
; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
@@ -2583,22 +2411,8 @@ define i1 @length48_eq(i8* %x, i8* %y) nounwind {
; X64-MIC-AVX: # %bb.0:
; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm1
-; X64-MIC-AVX-NEXT: movq 40(%rdi), %rax
-; X64-MIC-AVX-NEXT: movq 32(%rdi), %rcx
-; X64-MIC-AVX-NEXT: vmovd %ecx, %xmm2
-; X64-MIC-AVX-NEXT: shrq $32, %rcx
-; X64-MIC-AVX-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
-; X64-MIC-AVX-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
-; X64-MIC-AVX-NEXT: shrq $32, %rax
-; X64-MIC-AVX-NEXT: movq 40(%rsi), %rcx
-; X64-MIC-AVX-NEXT: movq 32(%rsi), %rdx
-; X64-MIC-AVX-NEXT: vmovd %edx, %xmm3
-; X64-MIC-AVX-NEXT: shrq $32, %rdx
-; X64-MIC-AVX-NEXT: vpinsrd $1, %edx, %xmm3, %xmm3
-; X64-MIC-AVX-NEXT: vpinsrd $2, %ecx, %xmm3, %xmm3
-; X64-MIC-AVX-NEXT: shrq $32, %rcx
-; X64-MIC-AVX-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2
-; X64-MIC-AVX-NEXT: vpinsrd $3, %ecx, %xmm3, %xmm3
+; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm2
+; X64-MIC-AVX-NEXT: vmovdqu 32(%rsi), %xmm3
; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm2, %k0
; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
; X64-MIC-AVX-NEXT: kortestw %k0, %k1
@@ -2728,150 +2542,37 @@ define i1 @length48_eq_const(i8* %X) nounwind {
;
; X64-AVX2-LABEL: length48_eq_const:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: pushq %rbp
-; X64-AVX2-NEXT: pushq %r15
-; X64-AVX2-NEXT: pushq %r14
-; X64-AVX2-NEXT: pushq %r12
-; X64-AVX2-NEXT: pushq %rbx
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: movq 40(%rdi), %rcx
-; X64-AVX2-NEXT: movq %rcx, %r8
-; X64-AVX2-NEXT: shrq $56, %r8
-; X64-AVX2-NEXT: movq %rcx, %r9
-; X64-AVX2-NEXT: shrq $48, %r9
-; X64-AVX2-NEXT: movq %rcx, %r10
-; X64-AVX2-NEXT: shrq $32, %r10
-; X64-AVX2-NEXT: movl %ecx, %r11d
-; X64-AVX2-NEXT: shrl $24, %r11d
-; X64-AVX2-NEXT: movl %ecx, %r14d
-; X64-AVX2-NEXT: shrl $16, %r14d
-; X64-AVX2-NEXT: movl %ecx, %r15d
-; X64-AVX2-NEXT: shrl $8, %r15d
-; X64-AVX2-NEXT: movq 32(%rdi), %rdi
-; X64-AVX2-NEXT: movq %rdi, %r12
-; X64-AVX2-NEXT: shrq $56, %r12
-; X64-AVX2-NEXT: movq %rdi, %rbx
-; X64-AVX2-NEXT: shrq $48, %rbx
-; X64-AVX2-NEXT: movq %rdi, %rdx
-; X64-AVX2-NEXT: shrq $32, %rdx
-; X64-AVX2-NEXT: movl %edi, %ebp
-; X64-AVX2-NEXT: shrl $24, %ebp
-; X64-AVX2-NEXT: movl %edi, %esi
-; X64-AVX2-NEXT: shrl $16, %esi
-; X64-AVX2-NEXT: vmovd %edi, %xmm1
-; X64-AVX2-NEXT: movl %edi, %eax
-; X64-AVX2-NEXT: shrl $8, %eax
-; X64-AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; X64-AVX2-NEXT: vpinsrb $2, %esi, %xmm1, %xmm1
-; X64-AVX2-NEXT: vpinsrb $3, %ebp, %xmm1, %xmm1
-; X64-AVX2-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
-; X64-AVX2-NEXT: shrq $40, %rdi
-; X64-AVX2-NEXT: vpinsrb $5, %edi, %xmm1, %xmm1
-; X64-AVX2-NEXT: vpinsrb $6, %ebx, %xmm1, %xmm1
-; X64-AVX2-NEXT: vpinsrb $7, %r12d, %xmm1, %xmm1
-; X64-AVX2-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
-; X64-AVX2-NEXT: vpinsrb $9, %r15d, %xmm1, %xmm1
-; X64-AVX2-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1
-; X64-AVX2-NEXT: vpinsrb $11, %r11d, %xmm1, %xmm1
-; X64-AVX2-NEXT: vpinsrb $12, %r10d, %xmm1, %xmm1
-; X64-AVX2-NEXT: shrq $40, %rcx
-; X64-AVX2-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
-; X64-AVX2-NEXT: vpinsrb $14, %r9d, %xmm1, %xmm1
-; X64-AVX2-NEXT: vpinsrb $15, %r8d, %xmm1, %xmm1
-; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1
+; X64-AVX2-NEXT: vmovdqu 32(%rdi), %xmm1
; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1
; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vptest %ymm0, %ymm0
; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: popq %rbx
-; X64-AVX2-NEXT: popq %r12
-; X64-AVX2-NEXT: popq %r14
-; X64-AVX2-NEXT: popq %r15
-; X64-AVX2-NEXT: popq %rbp
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
;
; X64-AVX512-LABEL: length48_eq_const:
; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: pushq %rbp
-; X64-AVX512-NEXT: pushq %r15
-; X64-AVX512-NEXT: pushq %r14
-; X64-AVX512-NEXT: pushq %r12
-; X64-AVX512-NEXT: pushq %rbx
; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX512-NEXT: movq 40(%rdi), %rcx
-; X64-AVX512-NEXT: movq %rcx, %r8
-; X64-AVX512-NEXT: shrq $56, %r8
-; X64-AVX512-NEXT: movq %rcx, %r9
-; X64-AVX512-NEXT: shrq $48, %r9
-; X64-AVX512-NEXT: movq %rcx, %r10
-; X64-AVX512-NEXT: shrq $32, %r10
-; X64-AVX512-NEXT: movl %ecx, %r11d
-; X64-AVX512-NEXT: shrl $24, %r11d
-; X64-AVX512-NEXT: movl %ecx, %r14d
-; X64-AVX512-NEXT: shrl $16, %r14d
-; X64-AVX512-NEXT: movl %ecx, %r15d
-; X64-AVX512-NEXT: shrl $8, %r15d
-; X64-AVX512-NEXT: movq 32(%rdi), %rdi
-; X64-AVX512-NEXT: movq %rdi, %r12
-; X64-AVX512-NEXT: shrq $56, %r12
-; X64-AVX512-NEXT: movq %rdi, %rbx
-; X64-AVX512-NEXT: shrq $48, %rbx
-; X64-AVX512-NEXT: movq %rdi, %rdx
-; X64-AVX512-NEXT: shrq $32, %rdx
-; X64-AVX512-NEXT: movl %edi, %ebp
-; X64-AVX512-NEXT: shrl $24, %ebp
-; X64-AVX512-NEXT: movl %edi, %esi
-; X64-AVX512-NEXT: shrl $16, %esi
-; X64-AVX512-NEXT: vmovd %edi, %xmm1
-; X64-AVX512-NEXT: movl %edi, %eax
-; X64-AVX512-NEXT: shrl $8, %eax
-; X64-AVX512-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; X64-AVX512-NEXT: vpinsrb $2, %esi, %xmm1, %xmm1
-; X64-AVX512-NEXT: vpinsrb $3, %ebp, %xmm1, %xmm1
-; X64-AVX512-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
-; X64-AVX512-NEXT: shrq $40, %rdi
-; X64-AVX512-NEXT: vpinsrb $5, %edi, %xmm1, %xmm1
-; X64-AVX512-NEXT: vpinsrb $6, %ebx, %xmm1, %xmm1
-; X64-AVX512-NEXT: vpinsrb $7, %r12d, %xmm1, %xmm1
-; X64-AVX512-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
-; X64-AVX512-NEXT: vpinsrb $9, %r15d, %xmm1, %xmm1
-; X64-AVX512-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1
-; X64-AVX512-NEXT: vpinsrb $11, %r11d, %xmm1, %xmm1
-; X64-AVX512-NEXT: vpinsrb $12, %r10d, %xmm1, %xmm1
-; X64-AVX512-NEXT: shrq $40, %rcx
-; X64-AVX512-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
-; X64-AVX512-NEXT: vpinsrb $14, %r9d, %xmm1, %xmm1
-; X64-AVX512-NEXT: vpinsrb $15, %r8d, %xmm1, %xmm1
-; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1
+; X64-AVX512-NEXT: vmovdqu 32(%rdi), %xmm1
; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1
; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vptest %ymm0, %ymm0
; X64-AVX512-NEXT: setne %al
-; X64-AVX512-NEXT: popq %rbx
-; X64-AVX512-NEXT: popq %r12
-; X64-AVX512-NEXT: popq %r14
-; X64-AVX512-NEXT: popq %r15
-; X64-AVX512-NEXT: popq %rbp
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
;
; X64-MIC-AVX-LABEL: length48_eq_const:
; X64-MIC-AVX: # %bb.0:
; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0
-; X64-MIC-AVX-NEXT: movq 40(%rdi), %rax
-; X64-MIC-AVX-NEXT: movq 32(%rdi), %rcx
-; X64-MIC-AVX-NEXT: vmovd %ecx, %xmm1
-; X64-MIC-AVX-NEXT: shrq $32, %rcx
-; X64-MIC-AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
-; X64-MIC-AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
-; X64-MIC-AVX-NEXT: shrq $32, %rax
-; X64-MIC-AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k0
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [892613426,959985462,858927408,926299444,0,0,0,0]
-; X64-MIC-AVX-NEXT: vpcmpneqd %zmm0, %zmm1, %k1
-; X64-MIC-AVX-NEXT: kortestw %k1, %k0
+; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm1
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [892613426,959985462,858927408,926299444,0,0,0,0]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
+; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960]
+; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; X64-MIC-AVX-NEXT: kortestw %k0, %k1
; X64-MIC-AVX-NEXT: setne %al
; X64-MIC-AVX-NEXT: vzeroupper
; X64-MIC-AVX-NEXT: retq
@@ -3420,187 +3121,11 @@ define i1 @length96_eq(i8* %x, i8* %y) nounwind {
;
; X64-AVX512BW-LABEL: length96_eq:
; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: movq 80(%rdi), %rcx
-; X64-AVX512BW-NEXT: movq %rcx, %rax
-; X64-AVX512BW-NEXT: movl %ecx, %edx
-; X64-AVX512BW-NEXT: shrl $8, %edx
-; X64-AVX512BW-NEXT: vmovd %ecx, %xmm0
-; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movl %ecx, %edx
-; X64-AVX512BW-NEXT: shrl $16, %edx
-; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movl %ecx, %edx
-; X64-AVX512BW-NEXT: shrl $24, %edx
-; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movq %rcx, %rdx
-; X64-AVX512BW-NEXT: shrq $32, %rdx
-; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movq %rcx, %rdx
-; X64-AVX512BW-NEXT: shrq $40, %rcx
-; X64-AVX512BW-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movq 88(%rdi), %rcx
-; X64-AVX512BW-NEXT: shrq $48, %rdx
-; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movq %rcx, %rdx
-; X64-AVX512BW-NEXT: shrq $56, %rax
-; X64-AVX512BW-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movl %ecx, %eax
-; X64-AVX512BW-NEXT: shrl $8, %eax
-; X64-AVX512BW-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movl %ecx, %eax
-; X64-AVX512BW-NEXT: shrl $16, %eax
-; X64-AVX512BW-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movl %ecx, %eax
-; X64-AVX512BW-NEXT: shrl $24, %eax
-; X64-AVX512BW-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movq %rcx, %rax
-; X64-AVX512BW-NEXT: shrq $32, %rax
-; X64-AVX512BW-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movq %rcx, %rax
-; X64-AVX512BW-NEXT: shrq $40, %rcx
-; X64-AVX512BW-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movq 64(%rdi), %rcx
-; X64-AVX512BW-NEXT: shrq $48, %rax
-; X64-AVX512BW-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movq %rcx, %rax
-; X64-AVX512BW-NEXT: shrq $56, %rdx
-; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movl %ecx, %edx
-; X64-AVX512BW-NEXT: shrl $8, %edx
-; X64-AVX512BW-NEXT: vmovd %ecx, %xmm1
-; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: movl %ecx, %edx
-; X64-AVX512BW-NEXT: shrl $16, %edx
-; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: movl %ecx, %edx
-; X64-AVX512BW-NEXT: shrl $24, %edx
-; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: movq %rcx, %rdx
-; X64-AVX512BW-NEXT: shrq $32, %rdx
-; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: movq %rcx, %rdx
-; X64-AVX512BW-NEXT: shrq $40, %rcx
-; X64-AVX512BW-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm2
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm1
-; X64-AVX512BW-NEXT: movq 72(%rdi), %rcx
-; X64-AVX512BW-NEXT: shrq $48, %rdx
-; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
-; X64-AVX512BW-NEXT: movq %rcx, %rdx
-; X64-AVX512BW-NEXT: shrq $56, %rdx
-; X64-AVX512BW-NEXT: shrq $56, %rax
-; X64-AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; X64-AVX512BW-NEXT: movl %ecx, %eax
-; X64-AVX512BW-NEXT: shrl $8, %eax
-; X64-AVX512BW-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
-; X64-AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; X64-AVX512BW-NEXT: movl %ecx, %eax
-; X64-AVX512BW-NEXT: shrl $16, %eax
-; X64-AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; X64-AVX512BW-NEXT: movl %ecx, %eax
-; X64-AVX512BW-NEXT: shrl $24, %eax
-; X64-AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; X64-AVX512BW-NEXT: movq %rcx, %rax
-; X64-AVX512BW-NEXT: shrq $32, %rax
-; X64-AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; X64-AVX512BW-NEXT: movq %rcx, %rax
-; X64-AVX512BW-NEXT: shrq $48, %rax
-; X64-AVX512BW-NEXT: shrq $40, %rcx
-; X64-AVX512BW-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2
-; X64-AVX512BW-NEXT: movq 80(%rsi), %rcx
-; X64-AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; X64-AVX512BW-NEXT: movq %rcx, %rax
-; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm2, %xmm2
-; X64-AVX512BW-NEXT: movl %ecx, %edx
-; X64-AVX512BW-NEXT: shrl $8, %edx
-; X64-AVX512BW-NEXT: vmovd %ecx, %xmm3
-; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3
-; X64-AVX512BW-NEXT: movl %ecx, %edx
-; X64-AVX512BW-NEXT: shrl $16, %edx
-; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm3, %xmm3
-; X64-AVX512BW-NEXT: movl %ecx, %edx
-; X64-AVX512BW-NEXT: shrl $24, %edx
-; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm3, %xmm3
-; X64-AVX512BW-NEXT: movq %rcx, %rdx
-; X64-AVX512BW-NEXT: shrq $32, %rdx
-; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm3, %xmm3
-; X64-AVX512BW-NEXT: movq %rcx, %rdx
-; X64-AVX512BW-NEXT: shrq $40, %rcx
-; X64-AVX512BW-NEXT: vpinsrb $5, %ecx, %xmm3, %xmm3
-; X64-AVX512BW-NEXT: movq 88(%rsi), %rcx
-; X64-AVX512BW-NEXT: shrq $48, %rdx
-; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm3, %xmm3
-; X64-AVX512BW-NEXT: movq %rcx, %rdx
-; X64-AVX512BW-NEXT: shrq $56, %rax
-; X64-AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; X64-AVX512BW-NEXT: movl %ecx, %eax
-; X64-AVX512BW-NEXT: shrl $8, %eax
-; X64-AVX512BW-NEXT: vpinsrb $8, %ecx, %xmm3, %xmm3
-; X64-AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; X64-AVX512BW-NEXT: movl %ecx, %eax
-; X64-AVX512BW-NEXT: shrl $16, %eax
-; X64-AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; X64-AVX512BW-NEXT: movl %ecx, %eax
-; X64-AVX512BW-NEXT: shrl $24, %eax
-; X64-AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; X64-AVX512BW-NEXT: movq %rcx, %rax
-; X64-AVX512BW-NEXT: shrq $32, %rax
-; X64-AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; X64-AVX512BW-NEXT: movq %rcx, %rax
-; X64-AVX512BW-NEXT: shrq $40, %rcx
-; X64-AVX512BW-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm3
-; X64-AVX512BW-NEXT: movq 64(%rsi), %rcx
-; X64-AVX512BW-NEXT: shrq $48, %rax
-; X64-AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; X64-AVX512BW-NEXT: movq %rcx, %rax
-; X64-AVX512BW-NEXT: shrq $56, %rdx
-; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm3, %xmm3
-; X64-AVX512BW-NEXT: movl %ecx, %edx
-; X64-AVX512BW-NEXT: shrl $8, %edx
-; X64-AVX512BW-NEXT: vmovd %ecx, %xmm4
-; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm4, %xmm4
-; X64-AVX512BW-NEXT: movl %ecx, %edx
-; X64-AVX512BW-NEXT: shrl $16, %edx
-; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm4, %xmm4
-; X64-AVX512BW-NEXT: movl %ecx, %edx
-; X64-AVX512BW-NEXT: shrl $24, %edx
-; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm4, %xmm4
-; X64-AVX512BW-NEXT: movq %rcx, %rdx
-; X64-AVX512BW-NEXT: shrq $32, %rdx
-; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm4, %xmm4
-; X64-AVX512BW-NEXT: movq %rcx, %rdx
-; X64-AVX512BW-NEXT: shrq $40, %rcx
-; X64-AVX512BW-NEXT: vpinsrb $5, %ecx, %xmm4, %xmm4
-; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm1, %k0
-; X64-AVX512BW-NEXT: movq 72(%rsi), %rcx
-; X64-AVX512BW-NEXT: shrq $48, %rdx
-; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm4, %xmm1
-; X64-AVX512BW-NEXT: movq %rcx, %rdx
-; X64-AVX512BW-NEXT: shrq $56, %rax
-; X64-AVX512BW-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: movl %ecx, %eax
-; X64-AVX512BW-NEXT: shrl $8, %eax
-; X64-AVX512BW-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: movl %ecx, %eax
-; X64-AVX512BW-NEXT: shrl $16, %eax
-; X64-AVX512BW-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: movl %ecx, %eax
-; X64-AVX512BW-NEXT: shrl $24, %eax
-; X64-AVX512BW-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: movq %rcx, %rax
-; X64-AVX512BW-NEXT: shrq $32, %rax
-; X64-AVX512BW-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: movq %rcx, %rax
-; X64-AVX512BW-NEXT: shrq $40, %rcx
-; X64-AVX512BW-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: shrq $48, %rax
-; X64-AVX512BW-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: shrq $56, %rdx
-; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0
-; X64-AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
-; X64-AVX512BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k1
+; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT: vmovdqu 64(%rdi), %ymm1
+; X64-AVX512BW-NEXT: vmovdqu 64(%rsi), %ymm2
+; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm0, %k0
+; X64-AVX512BW-NEXT: vpcmpneqb %zmm2, %zmm1, %k1
; X64-AVX512BW-NEXT: kortestq %k1, %k0
; X64-AVX512BW-NEXT: setne %al
; X64-AVX512BW-NEXT: vzeroupper
@@ -3608,43 +3133,11 @@ define i1 @length96_eq(i8* %x, i8* %y) nounwind {
;
; X64-AVX512F-LABEL: length96_eq:
; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: movq 80(%rdi), %rax
-; X64-AVX512F-NEXT: vmovd %eax, %xmm0
-; X64-AVX512F-NEXT: shrq $32, %rax
-; X64-AVX512F-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; X64-AVX512F-NEXT: movq 88(%rdi), %rax
-; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
-; X64-AVX512F-NEXT: shrq $32, %rax
-; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
-; X64-AVX512F-NEXT: movq 64(%rdi), %rax
-; X64-AVX512F-NEXT: vmovd %eax, %xmm1
-; X64-AVX512F-NEXT: shrq $32, %rax
-; X64-AVX512F-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm2
-; X64-AVX512F-NEXT: movq 72(%rdi), %rax
-; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
-; X64-AVX512F-NEXT: shrq $32, %rax
-; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
-; X64-AVX512F-NEXT: movq 80(%rsi), %rax
-; X64-AVX512F-NEXT: vmovd %eax, %xmm3
-; X64-AVX512F-NEXT: shrq $32, %rax
-; X64-AVX512F-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
-; X64-AVX512F-NEXT: movq 88(%rsi), %rax
-; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
-; X64-AVX512F-NEXT: shrq $32, %rax
-; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3
-; X64-AVX512F-NEXT: movq 64(%rsi), %rax
-; X64-AVX512F-NEXT: vmovd %eax, %xmm4
-; X64-AVX512F-NEXT: shrq $32, %rax
-; X64-AVX512F-NEXT: vpinsrd $1, %eax, %xmm4, %xmm4
-; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm2, %k0
-; X64-AVX512F-NEXT: movq 72(%rsi), %rax
-; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm4, %xmm2
-; X64-AVX512F-NEXT: shrq $32, %rax
-; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2
-; X64-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
-; X64-AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm1
-; X64-AVX512F-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1
+; X64-AVX512F-NEXT: vmovdqu 64(%rsi), %ymm2
+; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k0
+; X64-AVX512F-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
; X64-AVX512F-NEXT: kortestw %k1, %k0
; X64-AVX512F-NEXT: setne %al
; X64-AVX512F-NEXT: vzeroupper
@@ -3662,43 +3155,11 @@ define i1 @length96_eq(i8* %x, i8* %y) nounwind {
;
; X64-MIC-AVX512F-LABEL: length96_eq:
; X64-MIC-AVX512F: # %bb.0:
-; X64-MIC-AVX512F-NEXT: movq 80(%rdi), %rax
-; X64-MIC-AVX512F-NEXT: vmovd %eax, %xmm0
-; X64-MIC-AVX512F-NEXT: shrq $32, %rax
-; X64-MIC-AVX512F-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; X64-MIC-AVX512F-NEXT: movq 88(%rdi), %rax
-; X64-MIC-AVX512F-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
-; X64-MIC-AVX512F-NEXT: shrq $32, %rax
-; X64-MIC-AVX512F-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
-; X64-MIC-AVX512F-NEXT: movq 64(%rdi), %rax
-; X64-MIC-AVX512F-NEXT: vmovd %eax, %xmm1
-; X64-MIC-AVX512F-NEXT: shrq $32, %rax
-; X64-MIC-AVX512F-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
-; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm2
-; X64-MIC-AVX512F-NEXT: movq 72(%rdi), %rax
-; X64-MIC-AVX512F-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
-; X64-MIC-AVX512F-NEXT: shrq $32, %rax
-; X64-MIC-AVX512F-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
-; X64-MIC-AVX512F-NEXT: movq 80(%rsi), %rax
-; X64-MIC-AVX512F-NEXT: vmovd %eax, %xmm3
-; X64-MIC-AVX512F-NEXT: shrq $32, %rax
-; X64-MIC-AVX512F-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
-; X64-MIC-AVX512F-NEXT: movq 88(%rsi), %rax
-; X64-MIC-AVX512F-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
-; X64-MIC-AVX512F-NEXT: shrq $32, %rax
-; X64-MIC-AVX512F-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3
-; X64-MIC-AVX512F-NEXT: movq 64(%rsi), %rax
-; X64-MIC-AVX512F-NEXT: vmovd %eax, %xmm4
-; X64-MIC-AVX512F-NEXT: shrq $32, %rax
-; X64-MIC-AVX512F-NEXT: vpinsrd $1, %eax, %xmm4, %xmm4
-; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm2, %k0
-; X64-MIC-AVX512F-NEXT: movq 72(%rsi), %rax
-; X64-MIC-AVX512F-NEXT: vpinsrd $2, %eax, %xmm4, %xmm2
-; X64-MIC-AVX512F-NEXT: shrq $32, %rax
-; X64-MIC-AVX512F-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2
-; X64-MIC-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
-; X64-MIC-AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm1
-; X64-MIC-AVX512F-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-MIC-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1
+; X64-MIC-AVX512F-NEXT: vmovdqu 64(%rsi), %ymm2
+; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k0
+; X64-MIC-AVX512F-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
; X64-MIC-AVX512F-NEXT: kortestw %k1, %k0
; X64-MIC-AVX512F-NEXT: setne %al
; X64-MIC-AVX512F-NEXT: vzeroupper
@@ -3810,99 +3271,11 @@ define i1 @length96_eq_const(i8* %X) nounwind {
;
; X64-AVX512BW-LABEL: length96_eq_const:
; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: movq 80(%rdi), %rax
-; X64-AVX512BW-NEXT: movq %rax, %rcx
-; X64-AVX512BW-NEXT: vmovd %eax, %xmm0
-; X64-AVX512BW-NEXT: movl %eax, %edx
-; X64-AVX512BW-NEXT: shrl $8, %edx
-; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movl %eax, %edx
-; X64-AVX512BW-NEXT: shrl $16, %edx
-; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movl %eax, %edx
-; X64-AVX512BW-NEXT: shrl $24, %edx
-; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movq %rax, %rdx
-; X64-AVX512BW-NEXT: shrq $32, %rdx
-; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movq %rax, %rdx
-; X64-AVX512BW-NEXT: shrq $40, %rax
-; X64-AVX512BW-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movq 88(%rdi), %rax
-; X64-AVX512BW-NEXT: shrq $48, %rdx
-; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movq %rax, %rdx
-; X64-AVX512BW-NEXT: shrq $56, %rcx
-; X64-AVX512BW-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movl %eax, %ecx
-; X64-AVX512BW-NEXT: shrl $8, %ecx
-; X64-AVX512BW-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movl %eax, %ecx
-; X64-AVX512BW-NEXT: shrl $16, %ecx
-; X64-AVX512BW-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movl %eax, %ecx
-; X64-AVX512BW-NEXT: shrl $24, %ecx
-; X64-AVX512BW-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movq %rax, %rcx
-; X64-AVX512BW-NEXT: shrq $32, %rcx
-; X64-AVX512BW-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movq %rax, %rcx
-; X64-AVX512BW-NEXT: shrq $40, %rax
-; X64-AVX512BW-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movq 64(%rdi), %rax
-; X64-AVX512BW-NEXT: shrq $48, %rcx
-; X64-AVX512BW-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movq %rax, %rcx
-; X64-AVX512BW-NEXT: shrq $56, %rdx
-; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: movl %eax, %edx
-; X64-AVX512BW-NEXT: shrl $8, %edx
-; X64-AVX512BW-NEXT: vmovd %eax, %xmm1
-; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: movl %eax, %edx
-; X64-AVX512BW-NEXT: shrl $16, %edx
-; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: movl %eax, %edx
-; X64-AVX512BW-NEXT: shrl $24, %edx
-; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: movq %rax, %rdx
-; X64-AVX512BW-NEXT: shrq $32, %rdx
-; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: movq %rax, %rdx
-; X64-AVX512BW-NEXT: shrq $40, %rax
-; X64-AVX512BW-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm2
-; X64-AVX512BW-NEXT: movq 72(%rdi), %rax
-; X64-AVX512BW-NEXT: shrq $48, %rdx
-; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: movq %rax, %rdx
-; X64-AVX512BW-NEXT: shrq $56, %rcx
-; X64-AVX512BW-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: movl %eax, %ecx
-; X64-AVX512BW-NEXT: shrl $8, %ecx
-; X64-AVX512BW-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: movl %eax, %ecx
-; X64-AVX512BW-NEXT: shrl $16, %ecx
-; X64-AVX512BW-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: movl %eax, %ecx
-; X64-AVX512BW-NEXT: shrl $24, %ecx
-; X64-AVX512BW-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: movq %rax, %rcx
-; X64-AVX512BW-NEXT: shrq $32, %rcx
-; X64-AVX512BW-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: movq %rax, %rcx
-; X64-AVX512BW-NEXT: shrq $40, %rax
-; X64-AVX512BW-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: shrq $48, %rcx
-; X64-AVX512BW-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: shrq $56, %rdx
-; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT: vmovdqu 64(%rdi), %ymm1
; X64-AVX512BW-NEXT: vpcmpneqb {{.*}}(%rip), %zmm0, %k0
-; X64-AVX512BW-NEXT: vpcmpneqb {{.*}}(%rip), %zmm2, %k1
-; X64-AVX512BW-NEXT: kortestq %k0, %k1
+; X64-AVX512BW-NEXT: vpcmpneqb {{.*}}(%rip), %zmm1, %k1
+; X64-AVX512BW-NEXT: kortestq %k1, %k0
; X64-AVX512BW-NEXT: sete %al
; X64-AVX512BW-NEXT: vzeroupper
; X64-AVX512BW-NEXT: retq
@@ -3910,26 +3283,10 @@ define i1 @length96_eq_const(i8* %X) nounwind {
; X64-AVX512F-LABEL: length96_eq_const:
; X64-AVX512F: # %bb.0:
; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: movq 72(%rdi), %rax
-; X64-AVX512F-NEXT: movq 64(%rdi), %rcx
-; X64-AVX512F-NEXT: vmovd %ecx, %xmm1
-; X64-AVX512F-NEXT: shrq $32, %rcx
-; X64-AVX512F-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
-; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
-; X64-AVX512F-NEXT: shrq $32, %rax
-; X64-AVX512F-NEXT: movq 88(%rdi), %rcx
-; X64-AVX512F-NEXT: movq 80(%rdi), %rdx
-; X64-AVX512F-NEXT: vmovd %edx, %xmm2
-; X64-AVX512F-NEXT: shrq $32, %rdx
-; X64-AVX512F-NEXT: vpinsrd $1, %edx, %xmm2, %xmm2
-; X64-AVX512F-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
-; X64-AVX512F-NEXT: shrq $32, %rcx
-; X64-AVX512F-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm2
-; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
-; X64-AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; X64-AVX512F-NEXT: vpcmpneqd {{.*}}(%rip), %zmm1, %k0
-; X64-AVX512F-NEXT: vpcmpneqd {{.*}}(%rip), %zmm0, %k1
-; X64-AVX512F-NEXT: kortestw %k0, %k1
+; X64-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1
+; X64-AVX512F-NEXT: vpcmpneqd {{.*}}(%rip), %zmm0, %k0
+; X64-AVX512F-NEXT: vpcmpneqd {{.*}}(%rip), %zmm1, %k1
+; X64-AVX512F-NEXT: kortestw %k1, %k0
; X64-AVX512F-NEXT: sete %al
; X64-AVX512F-NEXT: vzeroupper
; X64-AVX512F-NEXT: retq
@@ -3948,26 +3305,10 @@ define i1 @length96_eq_const(i8* %X) nounwind {
; X64-MIC-AVX512F-LABEL: length96_eq_const:
; X64-MIC-AVX512F: # %bb.0:
; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-MIC-AVX512F-NEXT: movq 72(%rdi), %rax
-; X64-MIC-AVX512F-NEXT: movq 64(%rdi), %rcx
-; X64-MIC-AVX512F-NEXT: vmovd %ecx, %xmm1
-; X64-MIC-AVX512F-NEXT: shrq $32, %rcx
-; X64-MIC-AVX512F-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
-; X64-MIC-AVX512F-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
-; X64-MIC-AVX512F-NEXT: shrq $32, %rax
-; X64-MIC-AVX512F-NEXT: movq 88(%rdi), %rcx
-; X64-MIC-AVX512F-NEXT: movq 80(%rdi), %rdx
-; X64-MIC-AVX512F-NEXT: vmovd %edx, %xmm2
-; X64-MIC-AVX512F-NEXT: shrq $32, %rdx
-; X64-MIC-AVX512F-NEXT: vpinsrd $1, %edx, %xmm2, %xmm2
-; X64-MIC-AVX512F-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
-; X64-MIC-AVX512F-NEXT: shrq $32, %rcx
-; X64-MIC-AVX512F-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm2
-; X64-MIC-AVX512F-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
-; X64-MIC-AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; X64-MIC-AVX512F-NEXT: vpcmpneqd {{.*}}(%rip), %zmm1, %k0
-; X64-MIC-AVX512F-NEXT: vpcmpneqd {{.*}}(%rip), %zmm0, %k1
-; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1
+; X64-MIC-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1
+; X64-MIC-AVX512F-NEXT: vpcmpneqd {{.*}}(%rip), %zmm0, %k0
+; X64-MIC-AVX512F-NEXT: vpcmpneqd {{.*}}(%rip), %zmm1, %k1
+; X64-MIC-AVX512F-NEXT: kortestw %k1, %k0
; X64-MIC-AVX512F-NEXT: sete %al
; X64-MIC-AVX512F-NEXT: vzeroupper
; X64-MIC-AVX512F-NEXT: retq
More information about the llvm-commits
mailing list