[llvm] r342860 - [X86] Add 512-bit test cases to setcc-wide-types.ll. NFC
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 23 22:46:01 PDT 2018
Author: ctopper
Date: Sun Sep 23 22:46:01 2018
New Revision: 342860
URL: http://llvm.org/viewvc/llvm-project?rev=342860&view=rev
Log:
[X86] Add 512-bit test cases to setcc-wide-types.ll. NFC
Modified:
llvm/trunk/test/CodeGen/X86/setcc-wide-types.ll
Modified: llvm/trunk/test/CodeGen/X86/setcc-wide-types.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/setcc-wide-types.ll?rev=342860&r1=342859&r2=342860&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/setcc-wide-types.ll (original)
+++ llvm/trunk/test/CodeGen/X86/setcc-wide-types.ll Sun Sep 23 22:46:01 2018
@@ -189,6 +189,454 @@ define i32 @eq_i256(<4 x i64> %x, <4 x i
ret i32 %zext
}
+define i32 @ne_i512(<8 x i64> %x, <8 x i64> %y) {
+; SSE2-LABEL: ne_i512:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,0,1]
+; SSE2-NEXT: movq %xmm8, %rax
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm2[2,3,0,1]
+; SSE2-NEXT: movq %xmm8, %rcx
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm1[2,3,0,1]
+; SSE2-NEXT: movq %xmm8, %rdx
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm3[2,3,0,1]
+; SSE2-NEXT: movq %xmm8, %rsi
+; SSE2-NEXT: movq %xmm0, %r11
+; SSE2-NEXT: movq %xmm2, %r8
+; SSE2-NEXT: movq %xmm1, %r9
+; SSE2-NEXT: movq %xmm3, %r10
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
+; SSE2-NEXT: movq %xmm0, %rdi
+; SSE2-NEXT: xorq %rax, %rdi
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[2,3,0,1]
+; SSE2-NEXT: movq %xmm0, %rax
+; SSE2-NEXT: xorq %rcx, %rax
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[2,3,0,1]
+; SSE2-NEXT: movq %xmm0, %rcx
+; SSE2-NEXT: xorq %rdx, %rcx
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[2,3,0,1]
+; SSE2-NEXT: movq %xmm0, %rdx
+; SSE2-NEXT: xorq %rsi, %rdx
+; SSE2-NEXT: orq %rcx, %rdx
+; SSE2-NEXT: orq %rax, %rdx
+; SSE2-NEXT: orq %rdi, %rdx
+; SSE2-NEXT: movq %xmm4, %rax
+; SSE2-NEXT: xorq %r11, %rax
+; SSE2-NEXT: movq %xmm6, %rcx
+; SSE2-NEXT: xorq %r8, %rcx
+; SSE2-NEXT: movq %xmm5, %rsi
+; SSE2-NEXT: xorq %r9, %rsi
+; SSE2-NEXT: movq %xmm7, %rdi
+; SSE2-NEXT: xorq %r10, %rdi
+; SSE2-NEXT: orq %rsi, %rdi
+; SSE2-NEXT: orq %rcx, %rdi
+; SSE2-NEXT: orq %rax, %rdi
+; SSE2-NEXT: xorl %eax, %eax
+; SSE2-NEXT: orq %rdx, %rdi
+; SSE2-NEXT: setne %al
+; SSE2-NEXT: retq
+;
+; AVX1-LABEL: ne_i512:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vmovq %xmm1, %rcx
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vmovq %xmm4, %rdx
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT: vmovq %xmm5, %rsi
+; AVX1-NEXT: vpextrq $1, %xmm0, %r11
+; AVX1-NEXT: vpextrq $1, %xmm1, %r8
+; AVX1-NEXT: vpextrq $1, %xmm4, %r9
+; AVX1-NEXT: vpextrq $1, %xmm5, %r10
+; AVX1-NEXT: vmovq %xmm2, %rdi
+; AVX1-NEXT: xorq %rax, %rdi
+; AVX1-NEXT: vmovq %xmm3, %rax
+; AVX1-NEXT: xorq %rcx, %rax
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm0
+; AVX1-NEXT: vmovq %xmm0, %rcx
+; AVX1-NEXT: xorq %rdx, %rcx
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
+; AVX1-NEXT: vmovq %xmm1, %rdx
+; AVX1-NEXT: xorq %rsi, %rdx
+; AVX1-NEXT: orq %rcx, %rdx
+; AVX1-NEXT: orq %rax, %rdx
+; AVX1-NEXT: orq %rdi, %rdx
+; AVX1-NEXT: vpextrq $1, %xmm2, %rax
+; AVX1-NEXT: xorq %r11, %rax
+; AVX1-NEXT: vpextrq $1, %xmm3, %rcx
+; AVX1-NEXT: xorq %r8, %rcx
+; AVX1-NEXT: vpextrq $1, %xmm0, %rsi
+; AVX1-NEXT: xorq %r9, %rsi
+; AVX1-NEXT: vpextrq $1, %xmm1, %rdi
+; AVX1-NEXT: xorq %r10, %rdi
+; AVX1-NEXT: orq %rsi, %rdi
+; AVX1-NEXT: orq %rcx, %rdi
+; AVX1-NEXT: orq %rax, %rdi
+; AVX1-NEXT: xorl %eax, %eax
+; AVX1-NEXT: orq %rdx, %rdi
+; AVX1-NEXT: setne %al
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: ne_i512:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovq %xmm0, %rax
+; AVX2-NEXT: vmovq %xmm1, %rcx
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
+; AVX2-NEXT: vmovq %xmm4, %rdx
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm5
+; AVX2-NEXT: vmovq %xmm5, %rsi
+; AVX2-NEXT: vpextrq $1, %xmm0, %r11
+; AVX2-NEXT: vpextrq $1, %xmm1, %r8
+; AVX2-NEXT: vpextrq $1, %xmm4, %r9
+; AVX2-NEXT: vpextrq $1, %xmm5, %r10
+; AVX2-NEXT: vmovq %xmm2, %rdi
+; AVX2-NEXT: xorq %rax, %rdi
+; AVX2-NEXT: vmovq %xmm3, %rax
+; AVX2-NEXT: xorq %rcx, %rax
+; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm0
+; AVX2-NEXT: vmovq %xmm0, %rcx
+; AVX2-NEXT: xorq %rdx, %rcx
+; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm1
+; AVX2-NEXT: vmovq %xmm1, %rdx
+; AVX2-NEXT: xorq %rsi, %rdx
+; AVX2-NEXT: orq %rcx, %rdx
+; AVX2-NEXT: orq %rax, %rdx
+; AVX2-NEXT: orq %rdi, %rdx
+; AVX2-NEXT: vpextrq $1, %xmm2, %rax
+; AVX2-NEXT: xorq %r11, %rax
+; AVX2-NEXT: vpextrq $1, %xmm3, %rcx
+; AVX2-NEXT: xorq %r8, %rcx
+; AVX2-NEXT: vpextrq $1, %xmm0, %rsi
+; AVX2-NEXT: xorq %r9, %rsi
+; AVX2-NEXT: vpextrq $1, %xmm1, %rdi
+; AVX2-NEXT: xorq %r10, %rdi
+; AVX2-NEXT: orq %rsi, %rdi
+; AVX2-NEXT: orq %rcx, %rdi
+; AVX2-NEXT: orq %rax, %rdi
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: orq %rdx, %rdi
+; AVX2-NEXT: setne %al
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: ne_i512:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX512F-NEXT: vmovq %xmm2, %rdx
+; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm3
+; AVX512F-NEXT: vmovq %xmm3, %rsi
+; AVX512F-NEXT: vmovq %xmm0, %rdi
+; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm4
+; AVX512F-NEXT: vmovq %xmm4, %rax
+; AVX512F-NEXT: vpextrq $1, %xmm2, %r11
+; AVX512F-NEXT: vpextrq $1, %xmm3, %r10
+; AVX512F-NEXT: vpextrq $1, %xmm0, %r9
+; AVX512F-NEXT: vpextrq $1, %xmm4, %r8
+; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0
+; AVX512F-NEXT: vmovq %xmm0, %rcx
+; AVX512F-NEXT: xorq %rdx, %rcx
+; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm2
+; AVX512F-NEXT: vmovq %xmm2, %rdx
+; AVX512F-NEXT: xorq %rsi, %rdx
+; AVX512F-NEXT: orq %rcx, %rdx
+; AVX512F-NEXT: vmovq %xmm1, %rcx
+; AVX512F-NEXT: xorq %rdi, %rcx
+; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm3
+; AVX512F-NEXT: vmovq %xmm3, %rsi
+; AVX512F-NEXT: xorq %rax, %rsi
+; AVX512F-NEXT: orq %rdx, %rsi
+; AVX512F-NEXT: orq %rcx, %rsi
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512F-NEXT: xorq %r11, %rax
+; AVX512F-NEXT: vpextrq $1, %xmm2, %rcx
+; AVX512F-NEXT: xorq %r10, %rcx
+; AVX512F-NEXT: orq %rax, %rcx
+; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
+; AVX512F-NEXT: xorq %r9, %rax
+; AVX512F-NEXT: vpextrq $1, %xmm3, %rdx
+; AVX512F-NEXT: xorq %r8, %rdx
+; AVX512F-NEXT: orq %rcx, %rdx
+; AVX512F-NEXT: orq %rax, %rdx
+; AVX512F-NEXT: xorl %eax, %eax
+; AVX512F-NEXT: orq %rsi, %rdx
+; AVX512F-NEXT: setne %al
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: ne_i512:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX512BW-NEXT: vmovq %xmm2, %rdx
+; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm3
+; AVX512BW-NEXT: vmovq %xmm3, %rsi
+; AVX512BW-NEXT: vmovq %xmm0, %rdi
+; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm4
+; AVX512BW-NEXT: vmovq %xmm4, %rax
+; AVX512BW-NEXT: vpextrq $1, %xmm2, %r11
+; AVX512BW-NEXT: vpextrq $1, %xmm3, %r10
+; AVX512BW-NEXT: vpextrq $1, %xmm0, %r9
+; AVX512BW-NEXT: vpextrq $1, %xmm4, %r8
+; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rcx
+; AVX512BW-NEXT: xorq %rdx, %rcx
+; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2
+; AVX512BW-NEXT: vmovq %xmm2, %rdx
+; AVX512BW-NEXT: xorq %rsi, %rdx
+; AVX512BW-NEXT: orq %rcx, %rdx
+; AVX512BW-NEXT: vmovq %xmm1, %rcx
+; AVX512BW-NEXT: xorq %rdi, %rcx
+; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3
+; AVX512BW-NEXT: vmovq %xmm3, %rsi
+; AVX512BW-NEXT: xorq %rax, %rsi
+; AVX512BW-NEXT: orq %rdx, %rsi
+; AVX512BW-NEXT: orq %rcx, %rsi
+; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512BW-NEXT: xorq %r11, %rax
+; AVX512BW-NEXT: vpextrq $1, %xmm2, %rcx
+; AVX512BW-NEXT: xorq %r10, %rcx
+; AVX512BW-NEXT: orq %rax, %rcx
+; AVX512BW-NEXT: vpextrq $1, %xmm1, %rax
+; AVX512BW-NEXT: xorq %r9, %rax
+; AVX512BW-NEXT: vpextrq $1, %xmm3, %rdx
+; AVX512BW-NEXT: xorq %r8, %rdx
+; AVX512BW-NEXT: orq %rcx, %rdx
+; AVX512BW-NEXT: orq %rax, %rdx
+; AVX512BW-NEXT: xorl %eax, %eax
+; AVX512BW-NEXT: orq %rsi, %rdx
+; AVX512BW-NEXT: setne %al
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+ %bcx = bitcast <8 x i64> %x to i512
+ %bcy = bitcast <8 x i64> %y to i512
+ %cmp = icmp ne i512 %bcx, %bcy
+ %zext = zext i1 %cmp to i32
+ ret i32 %zext
+}
+
+define i32 @eq_i512(<8 x i64> %x, <8 x i64> %y) {
+; SSE2-LABEL: eq_i512:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,0,1]
+; SSE2-NEXT: movq %xmm8, %rax
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm2[2,3,0,1]
+; SSE2-NEXT: movq %xmm8, %rcx
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm1[2,3,0,1]
+; SSE2-NEXT: movq %xmm8, %rdx
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm3[2,3,0,1]
+; SSE2-NEXT: movq %xmm8, %rsi
+; SSE2-NEXT: movq %xmm0, %r11
+; SSE2-NEXT: movq %xmm2, %r8
+; SSE2-NEXT: movq %xmm1, %r9
+; SSE2-NEXT: movq %xmm3, %r10
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
+; SSE2-NEXT: movq %xmm0, %rdi
+; SSE2-NEXT: xorq %rax, %rdi
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[2,3,0,1]
+; SSE2-NEXT: movq %xmm0, %rax
+; SSE2-NEXT: xorq %rcx, %rax
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[2,3,0,1]
+; SSE2-NEXT: movq %xmm0, %rcx
+; SSE2-NEXT: xorq %rdx, %rcx
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[2,3,0,1]
+; SSE2-NEXT: movq %xmm0, %rdx
+; SSE2-NEXT: xorq %rsi, %rdx
+; SSE2-NEXT: orq %rcx, %rdx
+; SSE2-NEXT: orq %rax, %rdx
+; SSE2-NEXT: orq %rdi, %rdx
+; SSE2-NEXT: movq %xmm4, %rax
+; SSE2-NEXT: xorq %r11, %rax
+; SSE2-NEXT: movq %xmm6, %rcx
+; SSE2-NEXT: xorq %r8, %rcx
+; SSE2-NEXT: movq %xmm5, %rsi
+; SSE2-NEXT: xorq %r9, %rsi
+; SSE2-NEXT: movq %xmm7, %rdi
+; SSE2-NEXT: xorq %r10, %rdi
+; SSE2-NEXT: orq %rsi, %rdi
+; SSE2-NEXT: orq %rcx, %rdi
+; SSE2-NEXT: orq %rax, %rdi
+; SSE2-NEXT: xorl %eax, %eax
+; SSE2-NEXT: orq %rdx, %rdi
+; SSE2-NEXT: sete %al
+; SSE2-NEXT: retq
+;
+; AVX1-LABEL: eq_i512:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vmovq %xmm1, %rcx
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vmovq %xmm4, %rdx
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT: vmovq %xmm5, %rsi
+; AVX1-NEXT: vpextrq $1, %xmm0, %r11
+; AVX1-NEXT: vpextrq $1, %xmm1, %r8
+; AVX1-NEXT: vpextrq $1, %xmm4, %r9
+; AVX1-NEXT: vpextrq $1, %xmm5, %r10
+; AVX1-NEXT: vmovq %xmm2, %rdi
+; AVX1-NEXT: xorq %rax, %rdi
+; AVX1-NEXT: vmovq %xmm3, %rax
+; AVX1-NEXT: xorq %rcx, %rax
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm0
+; AVX1-NEXT: vmovq %xmm0, %rcx
+; AVX1-NEXT: xorq %rdx, %rcx
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
+; AVX1-NEXT: vmovq %xmm1, %rdx
+; AVX1-NEXT: xorq %rsi, %rdx
+; AVX1-NEXT: orq %rcx, %rdx
+; AVX1-NEXT: orq %rax, %rdx
+; AVX1-NEXT: orq %rdi, %rdx
+; AVX1-NEXT: vpextrq $1, %xmm2, %rax
+; AVX1-NEXT: xorq %r11, %rax
+; AVX1-NEXT: vpextrq $1, %xmm3, %rcx
+; AVX1-NEXT: xorq %r8, %rcx
+; AVX1-NEXT: vpextrq $1, %xmm0, %rsi
+; AVX1-NEXT: xorq %r9, %rsi
+; AVX1-NEXT: vpextrq $1, %xmm1, %rdi
+; AVX1-NEXT: xorq %r10, %rdi
+; AVX1-NEXT: orq %rsi, %rdi
+; AVX1-NEXT: orq %rcx, %rdi
+; AVX1-NEXT: orq %rax, %rdi
+; AVX1-NEXT: xorl %eax, %eax
+; AVX1-NEXT: orq %rdx, %rdi
+; AVX1-NEXT: sete %al
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: eq_i512:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovq %xmm0, %rax
+; AVX2-NEXT: vmovq %xmm1, %rcx
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
+; AVX2-NEXT: vmovq %xmm4, %rdx
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm5
+; AVX2-NEXT: vmovq %xmm5, %rsi
+; AVX2-NEXT: vpextrq $1, %xmm0, %r11
+; AVX2-NEXT: vpextrq $1, %xmm1, %r8
+; AVX2-NEXT: vpextrq $1, %xmm4, %r9
+; AVX2-NEXT: vpextrq $1, %xmm5, %r10
+; AVX2-NEXT: vmovq %xmm2, %rdi
+; AVX2-NEXT: xorq %rax, %rdi
+; AVX2-NEXT: vmovq %xmm3, %rax
+; AVX2-NEXT: xorq %rcx, %rax
+; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm0
+; AVX2-NEXT: vmovq %xmm0, %rcx
+; AVX2-NEXT: xorq %rdx, %rcx
+; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm1
+; AVX2-NEXT: vmovq %xmm1, %rdx
+; AVX2-NEXT: xorq %rsi, %rdx
+; AVX2-NEXT: orq %rcx, %rdx
+; AVX2-NEXT: orq %rax, %rdx
+; AVX2-NEXT: orq %rdi, %rdx
+; AVX2-NEXT: vpextrq $1, %xmm2, %rax
+; AVX2-NEXT: xorq %r11, %rax
+; AVX2-NEXT: vpextrq $1, %xmm3, %rcx
+; AVX2-NEXT: xorq %r8, %rcx
+; AVX2-NEXT: vpextrq $1, %xmm0, %rsi
+; AVX2-NEXT: xorq %r9, %rsi
+; AVX2-NEXT: vpextrq $1, %xmm1, %rdi
+; AVX2-NEXT: xorq %r10, %rdi
+; AVX2-NEXT: orq %rsi, %rdi
+; AVX2-NEXT: orq %rcx, %rdi
+; AVX2-NEXT: orq %rax, %rdi
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: orq %rdx, %rdi
+; AVX2-NEXT: sete %al
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: eq_i512:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX512F-NEXT: vmovq %xmm2, %rdx
+; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm3
+; AVX512F-NEXT: vmovq %xmm3, %rsi
+; AVX512F-NEXT: vmovq %xmm0, %rdi
+; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm4
+; AVX512F-NEXT: vmovq %xmm4, %rax
+; AVX512F-NEXT: vpextrq $1, %xmm2, %r11
+; AVX512F-NEXT: vpextrq $1, %xmm3, %r10
+; AVX512F-NEXT: vpextrq $1, %xmm0, %r9
+; AVX512F-NEXT: vpextrq $1, %xmm4, %r8
+; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0
+; AVX512F-NEXT: vmovq %xmm0, %rcx
+; AVX512F-NEXT: xorq %rdx, %rcx
+; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm2
+; AVX512F-NEXT: vmovq %xmm2, %rdx
+; AVX512F-NEXT: xorq %rsi, %rdx
+; AVX512F-NEXT: orq %rcx, %rdx
+; AVX512F-NEXT: vmovq %xmm1, %rcx
+; AVX512F-NEXT: xorq %rdi, %rcx
+; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm3
+; AVX512F-NEXT: vmovq %xmm3, %rsi
+; AVX512F-NEXT: xorq %rax, %rsi
+; AVX512F-NEXT: orq %rdx, %rsi
+; AVX512F-NEXT: orq %rcx, %rsi
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512F-NEXT: xorq %r11, %rax
+; AVX512F-NEXT: vpextrq $1, %xmm2, %rcx
+; AVX512F-NEXT: xorq %r10, %rcx
+; AVX512F-NEXT: orq %rax, %rcx
+; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
+; AVX512F-NEXT: xorq %r9, %rax
+; AVX512F-NEXT: vpextrq $1, %xmm3, %rdx
+; AVX512F-NEXT: xorq %r8, %rdx
+; AVX512F-NEXT: orq %rcx, %rdx
+; AVX512F-NEXT: orq %rax, %rdx
+; AVX512F-NEXT: xorl %eax, %eax
+; AVX512F-NEXT: orq %rsi, %rdx
+; AVX512F-NEXT: sete %al
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: eq_i512:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX512BW-NEXT: vmovq %xmm2, %rdx
+; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm3
+; AVX512BW-NEXT: vmovq %xmm3, %rsi
+; AVX512BW-NEXT: vmovq %xmm0, %rdi
+; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm4
+; AVX512BW-NEXT: vmovq %xmm4, %rax
+; AVX512BW-NEXT: vpextrq $1, %xmm2, %r11
+; AVX512BW-NEXT: vpextrq $1, %xmm3, %r10
+; AVX512BW-NEXT: vpextrq $1, %xmm0, %r9
+; AVX512BW-NEXT: vpextrq $1, %xmm4, %r8
+; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rcx
+; AVX512BW-NEXT: xorq %rdx, %rcx
+; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2
+; AVX512BW-NEXT: vmovq %xmm2, %rdx
+; AVX512BW-NEXT: xorq %rsi, %rdx
+; AVX512BW-NEXT: orq %rcx, %rdx
+; AVX512BW-NEXT: vmovq %xmm1, %rcx
+; AVX512BW-NEXT: xorq %rdi, %rcx
+; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3
+; AVX512BW-NEXT: vmovq %xmm3, %rsi
+; AVX512BW-NEXT: xorq %rax, %rsi
+; AVX512BW-NEXT: orq %rdx, %rsi
+; AVX512BW-NEXT: orq %rcx, %rsi
+; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512BW-NEXT: xorq %r11, %rax
+; AVX512BW-NEXT: vpextrq $1, %xmm2, %rcx
+; AVX512BW-NEXT: xorq %r10, %rcx
+; AVX512BW-NEXT: orq %rax, %rcx
+; AVX512BW-NEXT: vpextrq $1, %xmm1, %rax
+; AVX512BW-NEXT: xorq %r9, %rax
+; AVX512BW-NEXT: vpextrq $1, %xmm3, %rdx
+; AVX512BW-NEXT: xorq %r8, %rdx
+; AVX512BW-NEXT: orq %rcx, %rdx
+; AVX512BW-NEXT: orq %rax, %rdx
+; AVX512BW-NEXT: xorl %eax, %eax
+; AVX512BW-NEXT: orq %rsi, %rdx
+; AVX512BW-NEXT: sete %al
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+ %bcx = bitcast <8 x i64> %x to i512
+ %bcy = bitcast <8 x i64> %y to i512
+ %cmp = icmp eq i512 %bcx, %bcy
+ %zext = zext i1 %cmp to i32
+ ret i32 %zext
+}
+
; This test models the expansion of 'memcmp(a, b, 32) != 0'
; if we allowed 2 pairs of 16-byte loads per block.
@@ -456,4 +904,144 @@ define i32 @eq_i256_pair(i256* %a, i256*
%z = zext i1 %cmp to i32
ret i32 %z
}
+
+; This test models the expansion of 'memcmp(a, b, 64) != 0'
+; if we allowed 2 pairs of 64-byte loads per block.
+
+define i32 @ne_i512_pair(i512* %a, i512* %b) {
+; ANY-LABEL: ne_i512_pair:
+; ANY: # %bb.0:
+; ANY-NEXT: movq 32(%rdi), %r8
+; ANY-NEXT: movq 48(%rdi), %r9
+; ANY-NEXT: movq 40(%rdi), %rdx
+; ANY-NEXT: movq 56(%rdi), %rcx
+; ANY-NEXT: xorq 56(%rsi), %rcx
+; ANY-NEXT: movq 120(%rdi), %rax
+; ANY-NEXT: xorq 120(%rsi), %rax
+; ANY-NEXT: orq %rcx, %rax
+; ANY-NEXT: movq 88(%rdi), %rcx
+; ANY-NEXT: xorq 88(%rsi), %rcx
+; ANY-NEXT: orq %rcx, %rax
+; ANY-NEXT: movq 24(%rdi), %rcx
+; ANY-NEXT: xorq 24(%rsi), %rcx
+; ANY-NEXT: xorq 40(%rsi), %rdx
+; ANY-NEXT: orq %rcx, %rax
+; ANY-NEXT: movq 104(%rdi), %rcx
+; ANY-NEXT: xorq 104(%rsi), %rcx
+; ANY-NEXT: orq %rdx, %rcx
+; ANY-NEXT: movq 72(%rdi), %rdx
+; ANY-NEXT: xorq 72(%rsi), %rdx
+; ANY-NEXT: orq %rdx, %rcx
+; ANY-NEXT: movq 16(%rdi), %r10
+; ANY-NEXT: orq %rax, %rcx
+; ANY-NEXT: movq 8(%rdi), %rax
+; ANY-NEXT: xorq 8(%rsi), %rax
+; ANY-NEXT: xorq 48(%rsi), %r9
+; ANY-NEXT: orq %rax, %rcx
+; ANY-NEXT: movq 112(%rdi), %rax
+; ANY-NEXT: xorq 112(%rsi), %rax
+; ANY-NEXT: orq %r9, %rax
+; ANY-NEXT: movq 80(%rdi), %rdx
+; ANY-NEXT: xorq 80(%rsi), %rdx
+; ANY-NEXT: orq %rdx, %rax
+; ANY-NEXT: movq (%rdi), %r9
+; ANY-NEXT: xorq 16(%rsi), %r10
+; ANY-NEXT: xorq (%rsi), %r9
+; ANY-NEXT: xorq 32(%rsi), %r8
+; ANY-NEXT: orq %r10, %rax
+; ANY-NEXT: movq 96(%rdi), %rdx
+; ANY-NEXT: movq 64(%rdi), %rdi
+; ANY-NEXT: xorq 64(%rsi), %rdi
+; ANY-NEXT: xorq 96(%rsi), %rdx
+; ANY-NEXT: orq %r8, %rdx
+; ANY-NEXT: orq %rdi, %rdx
+; ANY-NEXT: orq %rax, %rdx
+; ANY-NEXT: orq %r9, %rdx
+; ANY-NEXT: xorl %eax, %eax
+; ANY-NEXT: orq %rcx, %rdx
+; ANY-NEXT: setne %al
+; ANY-NEXT: retq
+ %a0 = load i512, i512* %a
+ %b0 = load i512, i512* %b
+ %xor1 = xor i512 %a0, %b0
+ %ap1 = getelementptr i512, i512* %a, i512 1
+ %bp1 = getelementptr i512, i512* %b, i512 1
+ %a1 = load i512, i512* %ap1
+ %b1 = load i512, i512* %bp1
+ %xor2 = xor i512 %a1, %b1
+ %or = or i512 %xor1, %xor2
+ %cmp = icmp ne i512 %or, 0
+ %z = zext i1 %cmp to i32
+ ret i32 %z
+}
+
+; This test models the expansion of 'memcmp(a, b, 64) == 0'
+; if we allowed 2 pairs of 64-byte loads per block.
+
+define i32 @eq_i512_pair(i512* %a, i512* %b) {
+; ANY-LABEL: eq_i512_pair:
+; ANY: # %bb.0:
+; ANY-NEXT: movq 32(%rdi), %r8
+; ANY-NEXT: movq 48(%rdi), %r9
+; ANY-NEXT: movq 40(%rdi), %rdx
+; ANY-NEXT: movq 56(%rdi), %rcx
+; ANY-NEXT: xorq 56(%rsi), %rcx
+; ANY-NEXT: movq 120(%rdi), %rax
+; ANY-NEXT: xorq 120(%rsi), %rax
+; ANY-NEXT: orq %rcx, %rax
+; ANY-NEXT: movq 88(%rdi), %rcx
+; ANY-NEXT: xorq 88(%rsi), %rcx
+; ANY-NEXT: orq %rcx, %rax
+; ANY-NEXT: movq 24(%rdi), %rcx
+; ANY-NEXT: xorq 24(%rsi), %rcx
+; ANY-NEXT: xorq 40(%rsi), %rdx
+; ANY-NEXT: orq %rcx, %rax
+; ANY-NEXT: movq 104(%rdi), %rcx
+; ANY-NEXT: xorq 104(%rsi), %rcx
+; ANY-NEXT: orq %rdx, %rcx
+; ANY-NEXT: movq 72(%rdi), %rdx
+; ANY-NEXT: xorq 72(%rsi), %rdx
+; ANY-NEXT: orq %rdx, %rcx
+; ANY-NEXT: movq 16(%rdi), %r10
+; ANY-NEXT: orq %rax, %rcx
+; ANY-NEXT: movq 8(%rdi), %rax
+; ANY-NEXT: xorq 8(%rsi), %rax
+; ANY-NEXT: xorq 48(%rsi), %r9
+; ANY-NEXT: orq %rax, %rcx
+; ANY-NEXT: movq 112(%rdi), %rax
+; ANY-NEXT: xorq 112(%rsi), %rax
+; ANY-NEXT: orq %r9, %rax
+; ANY-NEXT: movq 80(%rdi), %rdx
+; ANY-NEXT: xorq 80(%rsi), %rdx
+; ANY-NEXT: orq %rdx, %rax
+; ANY-NEXT: movq (%rdi), %r9
+; ANY-NEXT: xorq 16(%rsi), %r10
+; ANY-NEXT: xorq (%rsi), %r9
+; ANY-NEXT: xorq 32(%rsi), %r8
+; ANY-NEXT: orq %r10, %rax
+; ANY-NEXT: movq 96(%rdi), %rdx
+; ANY-NEXT: movq 64(%rdi), %rdi
+; ANY-NEXT: xorq 64(%rsi), %rdi
+; ANY-NEXT: xorq 96(%rsi), %rdx
+; ANY-NEXT: orq %r8, %rdx
+; ANY-NEXT: orq %rdi, %rdx
+; ANY-NEXT: orq %rax, %rdx
+; ANY-NEXT: orq %r9, %rdx
+; ANY-NEXT: xorl %eax, %eax
+; ANY-NEXT: orq %rcx, %rdx
+; ANY-NEXT: sete %al
+; ANY-NEXT: retq
+ %a0 = load i512, i512* %a
+ %b0 = load i512, i512* %b
+ %xor1 = xor i512 %a0, %b0
+ %ap1 = getelementptr i512, i512* %a, i512 1
+ %bp1 = getelementptr i512, i512* %b, i512 1
+ %a1 = load i512, i512* %ap1
+ %b1 = load i512, i512* %bp1
+ %xor2 = xor i512 %a1, %b1
+ %or = or i512 %xor1, %xor2
+ %cmp = icmp eq i512 %or, 0
+ %z = zext i1 %cmp to i32
+ ret i32 %z
+}
More information about the llvm-commits
mailing list