[llvm] r314584 - [X86] Support v64i8 mulhu/mulhs
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 29 21:21:46 PDT 2017
Author: ctopper
Date: Fri Sep 29 21:21:46 2017
New Revision: 314584
URL: http://llvm.org/viewvc/llvm-project?rev=314584&view=rev
Log:
[X86] Support v64i8 mulhu/mulhs
Implemented by splitting into two v32i8 mulhu/mulhs and concatenating the results.
Differential Revision: https://reviews.llvm.org/D38307
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll
llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=314584&r1=314583&r2=314584&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Sep 29 21:21:46 2017
@@ -1438,6 +1438,8 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::MUL, MVT::v64i8, Custom);
setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
+ setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
+ setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
@@ -21604,7 +21606,8 @@ static SDValue LowerMULH(SDValue Op, con
return Lower256IntArith(Op, DAG);
// Only i8 vectors should need custom lowering after this.
- assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256())) &&
+ assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) ||
+ (VT == MVT::v64i8 && Subtarget.hasBWI())) &&
"Unsupported vector type");
// Lower v16i8/v32i8 as extension to v8i16/v16i16 vector pairs, multiply,
@@ -21618,6 +21621,11 @@ static SDValue LowerMULH(SDValue Op, con
unsigned ExShift = (ISD::MULHU == Opcode ? ISD::SRL : ISD::SRA);
unsigned ExAVX = (ISD::MULHU == Opcode ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND);
+ // For 512-bit vectors, split into 256-bit vectors to allow the
+ // sign-extension to occur.
+ if (VT == MVT::v64i8)
+ return Lower512IntArith(Op, DAG);
+
// AVX2 implementations - extend xmm subvectors to ymm.
if (Subtarget.hasInt256()) {
unsigned NumElems = VT.getVectorNumElements();
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll?rev=314584&r1=314583&r2=314584&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll Fri Sep 29 21:21:46 2017
@@ -172,716 +172,26 @@ define <64 x i8> @test_div7_64i8(<64 x i
;
; AVX512BW-LABEL: test_div7_64i8:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; AVX512BW-NEXT: vpextrb $1, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx
-; AVX512BW-NEXT: movsbl %cl, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %edx
-; AVX512BW-NEXT: shrl $8, %edx
-; AVX512BW-NEXT: addb %dl, %cl
-; AVX512BW-NEXT: movl %ecx, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %cl
-; AVX512BW-NEXT: addb %dl, %cl
-; AVX512BW-NEXT: movzbl %cl, %ecx
-; AVX512BW-NEXT: vmovd %ecx, %xmm2
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $2, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $3, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $4, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $5, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $6, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $7, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $8, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $9, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $10, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $11, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $12, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $13, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $14, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $15, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
-; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx
-; AVX512BW-NEXT: movsbl %cl, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %edx
-; AVX512BW-NEXT: shrl $8, %edx
-; AVX512BW-NEXT: addb %dl, %cl
-; AVX512BW-NEXT: movl %ecx, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %cl
-; AVX512BW-NEXT: addb %dl, %cl
-; AVX512BW-NEXT: movzbl %cl, %ecx
-; AVX512BW-NEXT: vmovd %ecx, %xmm3
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2
-; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx
-; AVX512BW-NEXT: movsbl %cl, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %edx
-; AVX512BW-NEXT: shrl $8, %edx
-; AVX512BW-NEXT: addb %dl, %cl
-; AVX512BW-NEXT: movl %ecx, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %cl
-; AVX512BW-NEXT: addb %dl, %cl
-; AVX512BW-NEXT: movzbl %cl, %ecx
-; AVX512BW-NEXT: vmovd %ecx, %xmm3
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2
-; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpextrb $0, %xmm0, %ecx
-; AVX512BW-NEXT: movsbl %cl, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %edx
-; AVX512BW-NEXT: shrl $8, %edx
-; AVX512BW-NEXT: addb %dl, %cl
-; AVX512BW-NEXT: movl %ecx, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %cl
-; AVX512BW-NEXT: addb %dl, %cl
-; AVX512BW-NEXT: movzbl %cl, %ecx
-; AVX512BW-NEXT: vmovd %ecx, %xmm3
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $2, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $6, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $10, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %eax
-; AVX512BW-NEXT: imull $-109, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0
-; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1
+; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427]
+; AVX512BW-NEXT: vpmullw %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm3
+; AVX512BW-NEXT: vpmovsxbw %ymm3, %zmm3
+; AVX512BW-NEXT: vpmullw %zmm2, %zmm3, %zmm2
+; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
+; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm1
+; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
+; AVX512BW-NEXT: vpxorq %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm0
+; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
%res = sdiv <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
ret <64 x i8> %res
@@ -1121,909 +431,36 @@ define <64 x i8> @test_rem7_64i8(<64 x i
;
; AVX512BW-LABEL: test_rem7_64i8:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; AVX512BW-NEXT: vpextrb $1, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %edx
-; AVX512BW-NEXT: imull $-109, %edx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movb $7, %dil
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %edx
-; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %esi
-; AVX512BW-NEXT: imull $-109, %esi, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %sil
-; AVX512BW-NEXT: movzbl %sil, %eax
-; AVX512BW-NEXT: vmovd %eax, %xmm2
-; AVX512BW-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $2, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $3, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $4, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $5, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $6, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $7, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $8, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $9, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $10, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $11, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $12, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $13, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $14, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $15, %xmm1, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
-; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %esi
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %edx
-; AVX512BW-NEXT: imull $-109, %edx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vmovd %eax, %xmm3
-; AVX512BW-NEXT: vpinsrb $1, %esi, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2
-; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %esi
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %edx
-; AVX512BW-NEXT: imull $-109, %edx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vmovd %eax, %xmm3
-; AVX512BW-NEXT: vpinsrb $1, %esi, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2
-; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %esi
-; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %edx
-; AVX512BW-NEXT: imull $-109, %edx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrb $7, %cl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vmovd %eax, %xmm3
-; AVX512BW-NEXT: vpinsrb $1, %esi, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $2, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $6, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $10, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax
-; AVX512BW-NEXT: movsbl %al, %ecx
-; AVX512BW-NEXT: imull $-109, %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: shrb $7, %dl
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: addb %dl, %al
-; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT: mulb %dil
-; AVX512BW-NEXT: subb %al, %cl
-; AVX512BW-NEXT: movzbl %cl, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0
-; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1
+; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427]
+; AVX512BW-NEXT: vpmullw %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm3
+; AVX512BW-NEXT: vpmovsxbw %ymm3, %zmm3
+; AVX512BW-NEXT: vpmullw %zmm2, %zmm3, %zmm2
+; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
+; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm1
+; AVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm2
+; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
+; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
+; AVX512BW-NEXT: vpxorq %zmm3, %zmm2, %zmm2
+; AVX512BW-NEXT: vpsubb %zmm3, %zmm2, %zmm2
+; AVX512BW-NEXT: vpsrlw $7, %zmm1, %zmm1
+; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpaddb %zmm1, %zmm2, %zmm1
+; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm2
+; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512BW-NEXT: vpmullw %zmm3, %zmm2, %zmm2
+; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm1
+; AVX512BW-NEXT: vpmullw %zmm3, %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
+; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%res = srem <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
ret <64 x i8> %res
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll?rev=314584&r1=314583&r2=314584&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll Fri Sep 29 21:21:46 2017
@@ -178,588 +178,23 @@ define <64 x i8> @test_div7_64i8(<64 x i
;
; AVX512BW-LABEL: test_div7_64i8:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; AVX512BW-NEXT: vpextrb $1, %xmm1, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx
-; AVX512BW-NEXT: imull $37, %ecx, %edx
-; AVX512BW-NEXT: shrl $8, %edx
-; AVX512BW-NEXT: subb %dl, %cl
-; AVX512BW-NEXT: shrb %cl
-; AVX512BW-NEXT: addb %dl, %cl
-; AVX512BW-NEXT: shrb $2, %cl
-; AVX512BW-NEXT: movzbl %cl, %ecx
-; AVX512BW-NEXT: vmovd %ecx, %xmm2
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $2, %xmm1, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $3, %xmm1, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $4, %xmm1, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $5, %xmm1, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $6, %xmm1, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $7, %xmm1, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $8, %xmm1, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $9, %xmm1, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $10, %xmm1, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $11, %xmm1, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $12, %xmm1, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $13, %xmm1, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $14, %xmm1, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $15, %xmm1, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
-; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx
-; AVX512BW-NEXT: imull $37, %ecx, %edx
-; AVX512BW-NEXT: shrl $8, %edx
-; AVX512BW-NEXT: subb %dl, %cl
-; AVX512BW-NEXT: shrb %cl
-; AVX512BW-NEXT: addb %dl, %cl
-; AVX512BW-NEXT: shrb $2, %cl
-; AVX512BW-NEXT: movzbl %cl, %ecx
-; AVX512BW-NEXT: vmovd %ecx, %xmm3
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2
-; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx
-; AVX512BW-NEXT: imull $37, %ecx, %edx
-; AVX512BW-NEXT: shrl $8, %edx
-; AVX512BW-NEXT: subb %dl, %cl
-; AVX512BW-NEXT: shrb %cl
-; AVX512BW-NEXT: addb %dl, %cl
-; AVX512BW-NEXT: shrb $2, %cl
-; AVX512BW-NEXT: movzbl %cl, %ecx
-; AVX512BW-NEXT: vmovd %ecx, %xmm3
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2
-; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpextrb $0, %xmm0, %ecx
-; AVX512BW-NEXT: imull $37, %ecx, %edx
-; AVX512BW-NEXT: shrl $8, %edx
-; AVX512BW-NEXT: subb %dl, %cl
-; AVX512BW-NEXT: shrb %cl
-; AVX512BW-NEXT: addb %dl, %cl
-; AVX512BW-NEXT: shrb $2, %cl
-; AVX512BW-NEXT: movzbl %cl, %ecx
-; AVX512BW-NEXT: vmovd %ecx, %xmm3
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $2, %xmm0, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $6, %xmm0, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $10, %xmm0, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax
-; AVX512BW-NEXT: imull $37, %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0
-; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
+; AVX512BW-NEXT: vpmullw %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm3
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero
+; AVX512BW-NEXT: vpmullw %zmm2, %zmm3, %zmm2
+; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
+; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; AVX512BW-NEXT: retq
%res = udiv <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
ret <64 x i8> %res
@@ -1005,781 +440,33 @@ define <64 x i8> @test_rem7_64i8(<64 x i
;
; AVX512BW-LABEL: test_rem7_64i8:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; AVX512BW-NEXT: vpextrb $1, %xmm1, %edx
-; AVX512BW-NEXT: imull $37, %edx, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %cl, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %cl, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: movb $7, %cl
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %edx
-; AVX512BW-NEXT: vpextrb $0, %xmm1, %esi
-; AVX512BW-NEXT: imull $37, %esi, %edi
-; AVX512BW-NEXT: shrl $8, %edi
-; AVX512BW-NEXT: movl %esi, %eax
-; AVX512BW-NEXT: subb %dil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %dil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %sil
-; AVX512BW-NEXT: movzbl %sil, %eax
-; AVX512BW-NEXT: vmovd %eax, %xmm2
-; AVX512BW-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $2, %xmm1, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $3, %xmm1, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $4, %xmm1, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $5, %xmm1, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $6, %xmm1, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $7, %xmm1, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $8, %xmm1, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $9, %xmm1, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $10, %xmm1, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $11, %xmm1, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $12, %xmm1, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $13, %xmm1, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $14, %xmm1, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $15, %xmm1, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
-; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; AVX512BW-NEXT: vpextrb $1, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %edx
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %esi
-; AVX512BW-NEXT: imull $37, %esi, %edi
-; AVX512BW-NEXT: shrl $8, %edi
-; AVX512BW-NEXT: movl %esi, %eax
-; AVX512BW-NEXT: subb %dil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %dil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %sil
-; AVX512BW-NEXT: movzbl %sil, %eax
-; AVX512BW-NEXT: vmovd %eax, %xmm3
-; AVX512BW-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $2, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $5, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $6, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $9, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $10, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $13, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $14, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2
-; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX512BW-NEXT: vpextrb $1, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %edx
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %esi
-; AVX512BW-NEXT: imull $37, %esi, %edi
-; AVX512BW-NEXT: shrl $8, %edi
-; AVX512BW-NEXT: movl %esi, %eax
-; AVX512BW-NEXT: subb %dil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %dil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %sil
-; AVX512BW-NEXT: movzbl %sil, %eax
-; AVX512BW-NEXT: vmovd %eax, %xmm3
-; AVX512BW-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $2, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $5, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $6, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $9, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $10, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $13, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $14, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2
-; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %edx
-; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi
-; AVX512BW-NEXT: imull $37, %esi, %edi
-; AVX512BW-NEXT: shrl $8, %edi
-; AVX512BW-NEXT: movl %esi, %eax
-; AVX512BW-NEXT: subb %dil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %dil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %sil
-; AVX512BW-NEXT: movzbl %sil, %eax
-; AVX512BW-NEXT: vmovd %eax, %xmm3
-; AVX512BW-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $3, %xmm0, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $4, %xmm0, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $7, %xmm0, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $8, %xmm0, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $11, %xmm0, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $12, %xmm0, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $15, %xmm0, %edx
-; AVX512BW-NEXT: imull $37, %edx, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: movl %edx, %eax
-; AVX512BW-NEXT: subb %sil, %al
-; AVX512BW-NEXT: shrb %al
-; AVX512BW-NEXT: addb %sil, %al
-; AVX512BW-NEXT: shrb $2, %al
-; AVX512BW-NEXT: mulb %cl
-; AVX512BW-NEXT: subb %al, %dl
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0
-; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
+; AVX512BW-NEXT: vpmullw %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm3
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero
+; AVX512BW-NEXT: vpmullw %zmm2, %zmm3, %zmm2
+; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
+; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT: vpsrlw $1, %zmm2, %zmm2
+; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
+; AVX512BW-NEXT: vpaddb %zmm1, %zmm2, %zmm1
+; AVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm2
+; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512BW-NEXT: vpmullw %zmm3, %zmm2, %zmm2
+; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm1
+; AVX512BW-NEXT: vpmullw %zmm3, %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
+; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%res = urem <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
ret <64 x i8> %res
More information about the llvm-commits
mailing list