[llvm] r275767 - [X86] Add AVX512 instructions to X86InstrInfo::isAssociativeAndCommutative.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 17 23:14:48 PDT 2016


Author: ctopper
Date: Mon Jul 18 01:14:47 2016
New Revision: 275767

URL: http://llvm.org/viewvc/llvm-project?rev=275767&view=rev
Log:
[X86] Add AVX512 instructions to X86InstrInfo::isAssociativeAndCommutative.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=275767&r1=275766&r2=275767&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Mon Jul 18 01:14:47 2016
@@ -7645,10 +7645,28 @@ bool X86InstrInfo::isAssociativeAndCommu
   case X86::PXORrr:
   case X86::VPANDrr:
   case X86::VPANDYrr:
+  case X86::VPANDDZ128rr:
+  case X86::VPANDDZ256rr:
+  case X86::VPANDDZrr:
+  case X86::VPANDQZ128rr:
+  case X86::VPANDQZ256rr:
+  case X86::VPANDQZrr:
   case X86::VPORrr:
   case X86::VPORYrr:
+  case X86::VPORDZ128rr:
+  case X86::VPORDZ256rr:
+  case X86::VPORDZrr:
+  case X86::VPORQZ128rr:
+  case X86::VPORQZ256rr:
+  case X86::VPORQZrr:
   case X86::VPXORrr:
   case X86::VPXORYrr:
+  case X86::VPXORDZ128rr:
+  case X86::VPXORDZ256rr:
+  case X86::VPXORDZrr:
+  case X86::VPXORQZ128rr:
+  case X86::VPXORQZ256rr:
+  case X86::VPXORQZrr:
   // Normal min/max instructions are not commutative because of NaN and signed
   // zero semantics, but these are. Thus, there's no need to check for global
   // relaxed math; the instructions themselves have the properties we need.
@@ -7664,14 +7682,30 @@ bool X86InstrInfo::isAssociativeAndCommu
   case X86::VMAXCPSrr:
   case X86::VMAXCPDYrr:
   case X86::VMAXCPSYrr:
+  case X86::VMAXCPDZ128rr:
+  case X86::VMAXCPSZ128rr:
+  case X86::VMAXCPDZ256rr:
+  case X86::VMAXCPSZ256rr:
+  case X86::VMAXCPDZrr:
+  case X86::VMAXCPSZrr:
   case X86::VMAXCSDrr:
   case X86::VMAXCSSrr:
+  case X86::VMAXCSDZrr:
+  case X86::VMAXCSSZrr:
   case X86::VMINCPDrr:
   case X86::VMINCPSrr:
   case X86::VMINCPDYrr:
   case X86::VMINCPSYrr:
+  case X86::VMINCPDZ128rr:
+  case X86::VMINCPSZ128rr:
+  case X86::VMINCPDZ256rr:
+  case X86::VMINCPSZ256rr:
+  case X86::VMINCPDZrr:
+  case X86::VMINCPSZrr:
   case X86::VMINCSDrr:
   case X86::VMINCSSrr:
+  case X86::VMINCSDZrr:
+  case X86::VMINCSSZrr:
     return true;
   case X86::ADDPDrr:
   case X86::ADDPSrr:
@@ -7685,14 +7719,30 @@ bool X86InstrInfo::isAssociativeAndCommu
   case X86::VADDPSrr:
   case X86::VADDPDYrr:
   case X86::VADDPSYrr:
+  case X86::VADDPDZ128rr:
+  case X86::VADDPSZ128rr:
+  case X86::VADDPDZ256rr:
+  case X86::VADDPSZ256rr:
+  case X86::VADDPDZrr:
+  case X86::VADDPSZrr:
   case X86::VADDSDrr:
   case X86::VADDSSrr:
+  case X86::VADDSDZrr:
+  case X86::VADDSSZrr:
   case X86::VMULPDrr:
   case X86::VMULPSrr:
   case X86::VMULPDYrr:
   case X86::VMULPSYrr:
+  case X86::VMULPDZ128rr:
+  case X86::VMULPSZ128rr:
+  case X86::VMULPDZ256rr:
+  case X86::VMULPSZ256rr:
+  case X86::VMULPDZrr:
+  case X86::VMULPSZrr:
   case X86::VMULSDrr:
   case X86::VMULSSrr:
+  case X86::VMULSDZrr:
+  case X86::VMULSSZrr:
     return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
   default:
     return false;

Modified: llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll?rev=275767&r1=275766&r2=275767&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll Mon Jul 18 01:14:47 2016
@@ -3081,99 +3081,99 @@ define <16 x i32> @test_bitreverse_v16i3
 ; AVX512F-LABEL: test_bitreverse_v16i32:
 ; AVX512F:       # BB#0:
 ; AVX512F-NEXT:    vpslld $29, %zmm0, %zmm1
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm1, %zmm2
+; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm1
 ; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm1, %zmm1
-; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm1, %zmm2, %zmm1
-; AVX512F-NEXT:    vpslld $27, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpslld $25, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpslld $23, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpslld $21, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpslld $19, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpslld $17, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpslld $15, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpslld $13, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpslld $11, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpslld $9, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpslld $7, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpslld $5, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpslld $3, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpslld $1, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrld $1, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrld $3, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrld $5, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrld $7, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrld $9, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrld $11, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrld $13, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrld $15, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrld $17, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrld $19, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrld $21, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrld $23, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrld $25, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrld $27, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrld $29, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT:    vpord %zmm2, %zmm1, %zmm1
+; AVX512F-NEXT:    vpslld $27, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpslld $25, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpslld $23, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpslld $21, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpslld $19, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpslld $17, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpslld $15, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpslld $13, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpslld $11, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpslld $9, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpslld $7, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpslld $5, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpslld $3, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpslld $1, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrld $1, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrld $3, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrld $5, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrld $7, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrld $9, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrld $11, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrld $13, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrld $15, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrld $17, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrld $19, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrld $21, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrld $23, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrld $25, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrld $27, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrld $29, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm3, %zmm3
+; AVX512F-NEXT:    vpord %zmm3, %zmm2, %zmm2
 ; AVX512F-NEXT:    vpsrld $31, %zmm0, %zmm0
 ; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; AVX512F-NEXT:    vpord %zmm0, %zmm2, %zmm0
 ; AVX512F-NEXT:    vpord %zmm0, %zmm1, %zmm0
 ; AVX512F-NEXT:    retq
 ;
@@ -3516,195 +3516,195 @@ define <8 x i64> @test_bitreverse_v8i64(
 ; AVX512F-LABEL: test_bitreverse_v8i64:
 ; AVX512F:       # BB#0:
 ; AVX512F-NEXT:    vpsllq $61, %zmm0, %zmm1
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm2
+; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm1
 ; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm1, %zmm2, %zmm1
-; AVX512F-NEXT:    vpsllq $59, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $57, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $55, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $53, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $51, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $49, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $47, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $45, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $43, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $41, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $39, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $37, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $35, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $33, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $31, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $29, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $27, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $25, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $23, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $21, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $19, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $17, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $15, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $13, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $11, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $9, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $7, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $5, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $3, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsllq $1, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $1, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $3, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $5, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $7, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $9, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $11, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $13, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $15, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $17, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $19, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $21, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $23, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $25, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $27, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $29, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $31, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $33, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $35, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $37, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $39, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $41, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $43, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $45, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $47, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $49, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $51, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $53, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $55, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $57, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $59, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vpsrlq $61, %zmm0, %zmm2
-; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm1, %zmm1
+; AVX512F-NEXT:    vpsllq $59, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $57, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $55, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $53, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $51, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $49, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $47, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $45, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $43, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $41, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $39, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $37, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $35, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $33, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $31, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $29, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $27, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $25, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $23, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $21, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $19, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $17, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $15, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $13, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $11, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $9, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $7, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $5, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $3, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $1, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $1, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $3, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $5, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $7, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $9, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $11, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $13, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $15, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $17, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $19, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $21, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $23, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $25, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $27, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $29, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $31, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $33, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $35, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $37, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $39, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $41, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $43, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $45, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $47, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $49, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $51, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $53, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $55, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $57, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $59, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $61, %zmm0, %zmm3
+; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm3, %zmm3
+; AVX512F-NEXT:    vporq %zmm3, %zmm2, %zmm2
 ; AVX512F-NEXT:    vpsrlq $63, %zmm0, %zmm0
 ; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512F-NEXT:    vporq %zmm0, %zmm2, %zmm0
 ; AVX512F-NEXT:    vporq %zmm0, %zmm1, %zmm0
 ; AVX512F-NEXT:    retq
 ;




More information about the llvm-commits mailing list