[llvm] r275767 - [X86] Add AVX512 instructions to X86InstrInfo::isAssociativeAndCommutative.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 17 23:14:48 PDT 2016
Author: ctopper
Date: Mon Jul 18 01:14:47 2016
New Revision: 275767
URL: http://llvm.org/viewvc/llvm-project?rev=275767&view=rev
Log:
[X86] Add AVX512 instructions to X86InstrInfo::isAssociativeAndCommutative.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=275767&r1=275766&r2=275767&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Mon Jul 18 01:14:47 2016
@@ -7645,10 +7645,28 @@ bool X86InstrInfo::isAssociativeAndCommu
case X86::PXORrr:
case X86::VPANDrr:
case X86::VPANDYrr:
+ case X86::VPANDDZ128rr:
+ case X86::VPANDDZ256rr:
+ case X86::VPANDDZrr:
+ case X86::VPANDQZ128rr:
+ case X86::VPANDQZ256rr:
+ case X86::VPANDQZrr:
case X86::VPORrr:
case X86::VPORYrr:
+ case X86::VPORDZ128rr:
+ case X86::VPORDZ256rr:
+ case X86::VPORDZrr:
+ case X86::VPORQZ128rr:
+ case X86::VPORQZ256rr:
+ case X86::VPORQZrr:
case X86::VPXORrr:
case X86::VPXORYrr:
+ case X86::VPXORDZ128rr:
+ case X86::VPXORDZ256rr:
+ case X86::VPXORDZrr:
+ case X86::VPXORQZ128rr:
+ case X86::VPXORQZ256rr:
+ case X86::VPXORQZrr:
// Normal min/max instructions are not commutative because of NaN and signed
// zero semantics, but these are. Thus, there's no need to check for global
// relaxed math; the instructions themselves have the properties we need.
@@ -7664,14 +7682,30 @@ bool X86InstrInfo::isAssociativeAndCommu
case X86::VMAXCPSrr:
case X86::VMAXCPDYrr:
case X86::VMAXCPSYrr:
+ case X86::VMAXCPDZ128rr:
+ case X86::VMAXCPSZ128rr:
+ case X86::VMAXCPDZ256rr:
+ case X86::VMAXCPSZ256rr:
+ case X86::VMAXCPDZrr:
+ case X86::VMAXCPSZrr:
case X86::VMAXCSDrr:
case X86::VMAXCSSrr:
+ case X86::VMAXCSDZrr:
+ case X86::VMAXCSSZrr:
case X86::VMINCPDrr:
case X86::VMINCPSrr:
case X86::VMINCPDYrr:
case X86::VMINCPSYrr:
+ case X86::VMINCPDZ128rr:
+ case X86::VMINCPSZ128rr:
+ case X86::VMINCPDZ256rr:
+ case X86::VMINCPSZ256rr:
+ case X86::VMINCPDZrr:
+ case X86::VMINCPSZrr:
case X86::VMINCSDrr:
case X86::VMINCSSrr:
+ case X86::VMINCSDZrr:
+ case X86::VMINCSSZrr:
return true;
case X86::ADDPDrr:
case X86::ADDPSrr:
@@ -7685,14 +7719,30 @@ bool X86InstrInfo::isAssociativeAndCommu
case X86::VADDPSrr:
case X86::VADDPDYrr:
case X86::VADDPSYrr:
+ case X86::VADDPDZ128rr:
+ case X86::VADDPSZ128rr:
+ case X86::VADDPDZ256rr:
+ case X86::VADDPSZ256rr:
+ case X86::VADDPDZrr:
+ case X86::VADDPSZrr:
case X86::VADDSDrr:
case X86::VADDSSrr:
+ case X86::VADDSDZrr:
+ case X86::VADDSSZrr:
case X86::VMULPDrr:
case X86::VMULPSrr:
case X86::VMULPDYrr:
case X86::VMULPSYrr:
+ case X86::VMULPDZ128rr:
+ case X86::VMULPSZ128rr:
+ case X86::VMULPDZ256rr:
+ case X86::VMULPSZ256rr:
+ case X86::VMULPDZrr:
+ case X86::VMULPSZrr:
case X86::VMULSDrr:
case X86::VMULSSrr:
+ case X86::VMULSDZrr:
+ case X86::VMULSSZrr:
return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
default:
return false;
Modified: llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll?rev=275767&r1=275766&r2=275767&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll Mon Jul 18 01:14:47 2016
@@ -3081,99 +3081,99 @@ define <16 x i32> @test_bitreverse_v16i3
; AVX512F-LABEL: test_bitreverse_v16i32:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpslld $29, %zmm0, %zmm1
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm1, %zmm2
+; AVX512F-NEXT: vpslld $31, %zmm0, %zmm1
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm1, %zmm1
-; AVX512F-NEXT: vpslld $31, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm1, %zmm2, %zmm1
-; AVX512F-NEXT: vpslld $27, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpslld $25, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpslld $23, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpslld $21, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpslld $19, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpslld $17, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpslld $15, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpslld $13, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpslld $11, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpslld $9, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpslld $7, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpslld $5, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpslld $3, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpslld $1, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrld $1, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrld $3, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrld $5, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrld $7, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrld $9, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrld $11, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrld $13, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrld $15, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrld $17, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrld $19, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrld $21, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrld $23, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrld $25, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrld $27, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrld $29, %zmm0, %zmm2
-; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vpord %zmm2, %zmm1, %zmm1
+; AVX512F-NEXT: vpslld $27, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpslld $25, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpslld $23, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpslld $21, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpslld $19, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpslld $17, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpslld $15, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpslld $13, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpslld $11, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpslld $9, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpslld $7, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpslld $5, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpslld $3, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpslld $1, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrld $1, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrld $3, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrld $5, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrld $7, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrld $9, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrld $11, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrld $13, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrld $15, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrld $17, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrld $19, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrld $21, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrld $23, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrld $25, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrld $27, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrld $29, %zmm0, %zmm3
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT: vpord %zmm3, %zmm2, %zmm2
; AVX512F-NEXT: vpsrld $31, %zmm0, %zmm0
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; AVX512F-NEXT: vpord %zmm0, %zmm2, %zmm0
; AVX512F-NEXT: vpord %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
@@ -3516,195 +3516,195 @@ define <8 x i64> @test_bitreverse_v8i64(
; AVX512F-LABEL: test_bitreverse_v8i64:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpsllq $61, %zmm0, %zmm1
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm2
+; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm1
; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm1, %zmm2, %zmm1
-; AVX512F-NEXT: vpsllq $59, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $57, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $55, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $53, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $51, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $49, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $47, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $45, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $43, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $41, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $39, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $37, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $35, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $33, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $31, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $29, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $27, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $25, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $23, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $21, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $19, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $17, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $15, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $13, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $11, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $9, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $7, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $5, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $3, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsllq $1, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $1, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $3, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $5, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $7, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $9, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $11, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $13, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $15, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $17, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $19, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $21, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $23, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $25, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $27, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $29, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $31, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $33, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $35, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $37, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $39, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $41, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $43, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $45, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $47, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $49, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $51, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $53, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $55, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $57, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $59, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq $61, %zmm0, %zmm2
-; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm2, %zmm1, %zmm1
+; AVX512F-NEXT: vpsllq $59, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $57, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $55, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $53, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $51, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $49, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $47, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $45, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $43, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $41, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $39, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $37, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $35, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $33, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $31, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $29, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $27, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $25, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $23, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $21, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $19, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $17, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $15, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $13, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $11, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $9, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $7, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $5, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $3, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllq $1, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $1, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $3, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $5, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $7, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $9, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $11, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $13, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $15, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $17, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $19, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $21, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $23, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $25, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $27, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $29, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $31, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $33, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $35, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $37, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $39, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $41, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $43, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $45, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $47, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $49, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $51, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $53, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $55, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $57, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $59, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $61, %zmm0, %zmm3
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT: vporq %zmm3, %zmm2, %zmm2
; AVX512F-NEXT: vpsrlq $63, %zmm0, %zmm0
; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512F-NEXT: vporq %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
More information about the llvm-commits
mailing list