[llvm] r314584 - [X86] Support v64i8 mulhu/mulhs

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 29 21:21:46 PDT 2017


Author: ctopper
Date: Fri Sep 29 21:21:46 2017
New Revision: 314584

URL: http://llvm.org/viewvc/llvm-project?rev=314584&view=rev
Log:
[X86] Support v64i8 mulhu/mulhs

Implemented by splitting into two v32i8 mulhu/mulhs and concatenating the results.

Differential Revision: https://reviews.llvm.org/D38307

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll
    llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=314584&r1=314583&r2=314584&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Sep 29 21:21:46 2017
@@ -1438,6 +1438,8 @@ X86TargetLowering::X86TargetLowering(con
     setOperationAction(ISD::MUL,                MVT::v64i8, Custom);
     setOperationAction(ISD::MULHS,              MVT::v32i16, Legal);
     setOperationAction(ISD::MULHU,              MVT::v32i16, Legal);
+    setOperationAction(ISD::MULHS,              MVT::v64i8, Custom);
+    setOperationAction(ISD::MULHU,              MVT::v64i8, Custom);
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v32i1, Custom);
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v64i1, Custom);
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v32i16, Custom);
@@ -21604,7 +21606,8 @@ static SDValue LowerMULH(SDValue Op, con
     return Lower256IntArith(Op, DAG);
 
   // Only i8 vectors should need custom lowering after this.
-  assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256())) &&
+  assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) ||
+         (VT == MVT::v64i8 && Subtarget.hasBWI())) &&
          "Unsupported vector type");
 
   // Lower v16i8/v32i8 as extension to v8i16/v16i16 vector pairs, multiply,
@@ -21618,6 +21621,11 @@ static SDValue LowerMULH(SDValue Op, con
   unsigned ExShift = (ISD::MULHU == Opcode ? ISD::SRL : ISD::SRA);
   unsigned ExAVX = (ISD::MULHU == Opcode ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND);
 
+  // For 512-bit vectors, split into 256-bit vectors to allow the
+  // sign-extension to occur.
+  if (VT == MVT::v64i8)
+    return Lower512IntArith(Op, DAG);
+
   // AVX2 implementations - extend xmm subvectors to ymm.
   if (Subtarget.hasInt256()) {
     unsigned NumElems = VT.getVectorNumElements();

Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll?rev=314584&r1=314583&r2=314584&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll Fri Sep 29 21:21:46 2017
@@ -172,716 +172,26 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ;
 ; AVX512BW-LABEL: test_div7_64i8:
 ; AVX512BW:       # BB#0:
-; AVX512BW-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
-; AVX512BW-NEXT:    vpextrb $1, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpextrb $0, %xmm1, %ecx
-; AVX512BW-NEXT:    movsbl %cl, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %edx
-; AVX512BW-NEXT:    shrl $8, %edx
-; AVX512BW-NEXT:    addb %dl, %cl
-; AVX512BW-NEXT:    movl %ecx, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %cl
-; AVX512BW-NEXT:    addb %dl, %cl
-; AVX512BW-NEXT:    movzbl %cl, %ecx
-; AVX512BW-NEXT:    vmovd %ecx, %xmm2
-; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $2, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $3, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $4, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $5, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $6, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $7, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $8, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $9, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $10, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $11, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $12, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $13, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $14, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $15, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm1
-; AVX512BW-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
-; AVX512BW-NEXT:    vpextrb $1, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpextrb $0, %xmm2, %ecx
-; AVX512BW-NEXT:    movsbl %cl, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %edx
-; AVX512BW-NEXT:    shrl $8, %edx
-; AVX512BW-NEXT:    addb %dl, %cl
-; AVX512BW-NEXT:    movl %ecx, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %cl
-; AVX512BW-NEXT:    addb %dl, %cl
-; AVX512BW-NEXT:    movzbl %cl, %ecx
-; AVX512BW-NEXT:    vmovd %ecx, %xmm3
-; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $2, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $3, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $4, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $5, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $6, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $7, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $8, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $9, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $10, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $11, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $12, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $13, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $14, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $15, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
-; AVX512BW-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
-; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm2
-; AVX512BW-NEXT:    vpextrb $1, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpextrb $0, %xmm2, %ecx
-; AVX512BW-NEXT:    movsbl %cl, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %edx
-; AVX512BW-NEXT:    shrl $8, %edx
-; AVX512BW-NEXT:    addb %dl, %cl
-; AVX512BW-NEXT:    movl %ecx, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %cl
-; AVX512BW-NEXT:    addb %dl, %cl
-; AVX512BW-NEXT:    movzbl %cl, %ecx
-; AVX512BW-NEXT:    vmovd %ecx, %xmm3
-; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $2, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $3, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $4, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $5, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $6, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $7, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $8, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $9, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $10, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $11, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $12, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $13, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $14, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $15, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
-; AVX512BW-NEXT:    vpextrb $1, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpextrb $0, %xmm0, %ecx
-; AVX512BW-NEXT:    movsbl %cl, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %edx
-; AVX512BW-NEXT:    shrl $8, %edx
-; AVX512BW-NEXT:    addb %dl, %cl
-; AVX512BW-NEXT:    movl %ecx, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %cl
-; AVX512BW-NEXT:    addb %dl, %cl
-; AVX512BW-NEXT:    movzbl %cl, %ecx
-; AVX512BW-NEXT:    vmovd %ecx, %xmm3
-; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $2, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $3, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $4, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $5, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $6, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $7, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $8, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $9, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $10, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $11, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $12, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $13, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $14, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $15, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %eax
-; AVX512BW-NEXT:    imull $-109, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm0
-; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
-; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpmovsxbw %ymm0, %zmm1
+; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427]
+; AVX512BW-NEXT:    vpmullw %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpsrlw $8, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpmovwb %zmm1, %ymm1
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512BW-NEXT:    vpmovsxbw %ymm3, %zmm3
+; AVX512BW-NEXT:    vpmullw %zmm2, %zmm3, %zmm2
+; AVX512BW-NEXT:    vpsrlw $8, %zmm2, %zmm2
+; AVX512BW-NEXT:    vpmovwb %zmm2, %ymm2
+; AVX512BW-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT:    vpsrlw $2, %zmm0, %zmm1
+; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
+; AVX512BW-NEXT:    vpxorq %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpsubb %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpsrlw $7, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    retq
   %res = sdiv <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
   ret <64 x i8> %res
@@ -1121,909 +431,36 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ;
 ; AVX512BW-LABEL: test_rem7_64i8:
 ; AVX512BW:       # BB#0:
-; AVX512BW-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
-; AVX512BW-NEXT:    vpextrb $1, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %edx
-; AVX512BW-NEXT:    imull $-109, %edx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb $7, %dil
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %edx
-; AVX512BW-NEXT:    vpextrb $0, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %esi
-; AVX512BW-NEXT:    imull $-109, %esi, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %sil
-; AVX512BW-NEXT:    movzbl %sil, %eax
-; AVX512BW-NEXT:    vmovd %eax, %xmm2
-; AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $2, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $3, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $4, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $5, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $6, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $7, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $8, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $9, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $10, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $11, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $12, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $13, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $14, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $15, %xmm1, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm1
-; AVX512BW-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
-; AVX512BW-NEXT:    vpextrb $1, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %esi
-; AVX512BW-NEXT:    vpextrb $0, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %edx
-; AVX512BW-NEXT:    imull $-109, %edx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vmovd %eax, %xmm3
-; AVX512BW-NEXT:    vpinsrb $1, %esi, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $2, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $3, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $4, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $5, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $6, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $7, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $8, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $9, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $10, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $11, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $12, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $13, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $14, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $15, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
-; AVX512BW-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
-; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm2
-; AVX512BW-NEXT:    vpextrb $1, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %esi
-; AVX512BW-NEXT:    vpextrb $0, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %edx
-; AVX512BW-NEXT:    imull $-109, %edx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vmovd %eax, %xmm3
-; AVX512BW-NEXT:    vpinsrb $1, %esi, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $2, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $3, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $4, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $5, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $6, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $7, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $8, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $9, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $10, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $11, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $12, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $13, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $14, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $15, %xmm2, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
-; AVX512BW-NEXT:    vpextrb $1, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %esi
-; AVX512BW-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %edx
-; AVX512BW-NEXT:    imull $-109, %edx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    shrb $7, %cl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vmovd %eax, %xmm3
-; AVX512BW-NEXT:    vpinsrb $1, %esi, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $2, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $3, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $4, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $5, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $6, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $7, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $8, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $9, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $10, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $11, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $12, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $13, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $14, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $15, %xmm0, %eax
-; AVX512BW-NEXT:    movsbl %al, %ecx
-; AVX512BW-NEXT:    imull $-109, %ecx, %eax
-; AVX512BW-NEXT:    shrl $8, %eax
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    shrb $7, %dl
-; AVX512BW-NEXT:    sarb $2, %al
-; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
-; AVX512BW-NEXT:    mulb %dil
-; AVX512BW-NEXT:    subb %al, %cl
-; AVX512BW-NEXT:    movzbl %cl, %eax
-; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm0
-; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
-; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpmovsxbw %ymm0, %zmm1
+; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427]
+; AVX512BW-NEXT:    vpmullw %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpsrlw $8, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpmovwb %zmm1, %ymm1
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512BW-NEXT:    vpmovsxbw %ymm3, %zmm3
+; AVX512BW-NEXT:    vpmullw %zmm2, %zmm3, %zmm2
+; AVX512BW-NEXT:    vpsrlw $8, %zmm2, %zmm2
+; AVX512BW-NEXT:    vpmovwb %zmm2, %ymm2
+; AVX512BW-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpsrlw $2, %zmm1, %zmm2
+; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
+; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
+; AVX512BW-NEXT:    vpxorq %zmm3, %zmm2, %zmm2
+; AVX512BW-NEXT:    vpsubb %zmm3, %zmm2, %zmm2
+; AVX512BW-NEXT:    vpsrlw $7, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT:    vpaddb %zmm1, %zmm2, %zmm1
+; AVX512BW-NEXT:    vpmovsxbw %ymm1, %zmm2
+; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512BW-NEXT:    vpmullw %zmm3, %zmm2, %zmm2
+; AVX512BW-NEXT:    vpmovwb %zmm2, %ymm2
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
+; AVX512BW-NEXT:    vpmovsxbw %ymm1, %zmm1
+; AVX512BW-NEXT:    vpmullw %zmm3, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpmovwb %zmm1, %ymm1
+; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm2, %zmm1
+; AVX512BW-NEXT:    vpsubb %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
   %res = srem <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
   ret <64 x i8> %res

Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll?rev=314584&r1=314583&r2=314584&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll Fri Sep 29 21:21:46 2017
@@ -178,588 +178,23 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ;
 ; AVX512BW-LABEL: test_div7_64i8:
 ; AVX512BW:       # BB#0:
-; AVX512BW-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
-; AVX512BW-NEXT:    vpextrb $1, %xmm1, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpextrb $0, %xmm1, %ecx
-; AVX512BW-NEXT:    imull $37, %ecx, %edx
-; AVX512BW-NEXT:    shrl $8, %edx
-; AVX512BW-NEXT:    subb %dl, %cl
-; AVX512BW-NEXT:    shrb %cl
-; AVX512BW-NEXT:    addb %dl, %cl
-; AVX512BW-NEXT:    shrb $2, %cl
-; AVX512BW-NEXT:    movzbl %cl, %ecx
-; AVX512BW-NEXT:    vmovd %ecx, %xmm2
-; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $2, %xmm1, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $3, %xmm1, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $4, %xmm1, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $5, %xmm1, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $6, %xmm1, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $7, %xmm1, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $8, %xmm1, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $9, %xmm1, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $10, %xmm1, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $11, %xmm1, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $12, %xmm1, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $13, %xmm1, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $14, %xmm1, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $15, %xmm1, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm1
-; AVX512BW-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
-; AVX512BW-NEXT:    vpextrb $1, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpextrb $0, %xmm2, %ecx
-; AVX512BW-NEXT:    imull $37, %ecx, %edx
-; AVX512BW-NEXT:    shrl $8, %edx
-; AVX512BW-NEXT:    subb %dl, %cl
-; AVX512BW-NEXT:    shrb %cl
-; AVX512BW-NEXT:    addb %dl, %cl
-; AVX512BW-NEXT:    shrb $2, %cl
-; AVX512BW-NEXT:    movzbl %cl, %ecx
-; AVX512BW-NEXT:    vmovd %ecx, %xmm3
-; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $2, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $3, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $4, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $5, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $6, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $7, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $8, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $9, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $10, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $11, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $12, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $13, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $14, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $15, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
-; AVX512BW-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
-; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm2
-; AVX512BW-NEXT:    vpextrb $1, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpextrb $0, %xmm2, %ecx
-; AVX512BW-NEXT:    imull $37, %ecx, %edx
-; AVX512BW-NEXT:    shrl $8, %edx
-; AVX512BW-NEXT:    subb %dl, %cl
-; AVX512BW-NEXT:    shrb %cl
-; AVX512BW-NEXT:    addb %dl, %cl
-; AVX512BW-NEXT:    shrb $2, %cl
-; AVX512BW-NEXT:    movzbl %cl, %ecx
-; AVX512BW-NEXT:    vmovd %ecx, %xmm3
-; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $2, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $3, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $4, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $5, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $6, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $7, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $8, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $9, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $10, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $11, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $12, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $13, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $14, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $15, %xmm2, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
-; AVX512BW-NEXT:    vpextrb $1, %xmm0, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpextrb $0, %xmm0, %ecx
-; AVX512BW-NEXT:    imull $37, %ecx, %edx
-; AVX512BW-NEXT:    shrl $8, %edx
-; AVX512BW-NEXT:    subb %dl, %cl
-; AVX512BW-NEXT:    shrb %cl
-; AVX512BW-NEXT:    addb %dl, %cl
-; AVX512BW-NEXT:    shrb $2, %cl
-; AVX512BW-NEXT:    movzbl %cl, %ecx
-; AVX512BW-NEXT:    vmovd %ecx, %xmm3
-; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $2, %xmm0, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $3, %xmm0, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $4, %xmm0, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $5, %xmm0, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $6, %xmm0, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $7, %xmm0, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $8, %xmm0, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $9, %xmm0, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $10, %xmm0, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $11, %xmm0, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $12, %xmm0, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $13, %xmm0, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $14, %xmm0, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $15, %xmm0, %eax
-; AVX512BW-NEXT:    imull $37, %eax, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movzbl %al, %eax
-; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm0
-; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
-; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
+; AVX512BW-NEXT:    vpmullw %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpsrlw $8, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpmovwb %zmm1, %ymm1
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero
+; AVX512BW-NEXT:    vpmullw %zmm2, %zmm3, %zmm2
+; AVX512BW-NEXT:    vpsrlw $8, %zmm2, %zmm2
+; AVX512BW-NEXT:    vpmovwb %zmm2, %ymm2
+; AVX512BW-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpsubb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpsrlw $1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpsrlw $2, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
   %res = udiv <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
   ret <64 x i8> %res
@@ -1005,781 +440,33 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ;
 ; AVX512BW-LABEL: test_rem7_64i8:
 ; AVX512BW:       # BB#0:
-; AVX512BW-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
-; AVX512BW-NEXT:    vpextrb $1, %xmm1, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %ecx
-; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %cl, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    movb $7, %cl
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %edx
-; AVX512BW-NEXT:    vpextrb $0, %xmm1, %esi
-; AVX512BW-NEXT:    imull $37, %esi, %edi
-; AVX512BW-NEXT:    shrl $8, %edi
-; AVX512BW-NEXT:    movl %esi, %eax
-; AVX512BW-NEXT:    subb %dil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %dil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %sil
-; AVX512BW-NEXT:    movzbl %sil, %eax
-; AVX512BW-NEXT:    vmovd %eax, %xmm2
-; AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $2, %xmm1, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $3, %xmm1, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $4, %xmm1, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $5, %xmm1, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $6, %xmm1, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $7, %xmm1, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $8, %xmm1, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $9, %xmm1, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $10, %xmm1, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $11, %xmm1, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $12, %xmm1, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $13, %xmm1, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $14, %xmm1, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpextrb $15, %xmm1, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm1
-; AVX512BW-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
-; AVX512BW-NEXT:    vpextrb $1, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %edx
-; AVX512BW-NEXT:    vpextrb $0, %xmm2, %esi
-; AVX512BW-NEXT:    imull $37, %esi, %edi
-; AVX512BW-NEXT:    shrl $8, %edi
-; AVX512BW-NEXT:    movl %esi, %eax
-; AVX512BW-NEXT:    subb %dil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %dil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %sil
-; AVX512BW-NEXT:    movzbl %sil, %eax
-; AVX512BW-NEXT:    vmovd %eax, %xmm3
-; AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $2, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $3, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $4, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $5, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $6, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $7, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $8, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $9, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $10, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $11, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $12, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $13, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $14, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $15, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
-; AVX512BW-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
-; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm2
-; AVX512BW-NEXT:    vpextrb $1, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %edx
-; AVX512BW-NEXT:    vpextrb $0, %xmm2, %esi
-; AVX512BW-NEXT:    imull $37, %esi, %edi
-; AVX512BW-NEXT:    shrl $8, %edi
-; AVX512BW-NEXT:    movl %esi, %eax
-; AVX512BW-NEXT:    subb %dil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %dil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %sil
-; AVX512BW-NEXT:    movzbl %sil, %eax
-; AVX512BW-NEXT:    vmovd %eax, %xmm3
-; AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $2, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $3, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $4, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $5, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $6, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $7, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $8, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $9, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $10, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $11, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $12, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $13, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $14, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $15, %xmm2, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
-; AVX512BW-NEXT:    vpextrb $1, %xmm0, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %edx
-; AVX512BW-NEXT:    vpextrb $0, %xmm0, %esi
-; AVX512BW-NEXT:    imull $37, %esi, %edi
-; AVX512BW-NEXT:    shrl $8, %edi
-; AVX512BW-NEXT:    movl %esi, %eax
-; AVX512BW-NEXT:    subb %dil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %dil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %sil
-; AVX512BW-NEXT:    movzbl %sil, %eax
-; AVX512BW-NEXT:    vmovd %eax, %xmm3
-; AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $2, %xmm0, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $3, %xmm0, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $4, %xmm0, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $5, %xmm0, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $6, %xmm0, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $7, %xmm0, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $8, %xmm0, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $9, %xmm0, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $10, %xmm0, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $11, %xmm0, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $12, %xmm0, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $13, %xmm0, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $14, %xmm0, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT:    vpextrb $15, %xmm0, %edx
-; AVX512BW-NEXT:    imull $37, %edx, %esi
-; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movl %edx, %eax
-; AVX512BW-NEXT:    subb %sil, %al
-; AVX512BW-NEXT:    shrb %al
-; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    shrb $2, %al
-; AVX512BW-NEXT:    mulb %cl
-; AVX512BW-NEXT:    subb %al, %dl
-; AVX512BW-NEXT:    movzbl %dl, %eax
-; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm0
-; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
-; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
+; AVX512BW-NEXT:    vpmullw %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpsrlw $8, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpmovwb %zmm1, %ymm1
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero
+; AVX512BW-NEXT:    vpmullw %zmm2, %zmm3, %zmm2
+; AVX512BW-NEXT:    vpsrlw $8, %zmm2, %zmm2
+; AVX512BW-NEXT:    vpmovwb %zmm2, %ymm2
+; AVX512BW-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpsubb %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT:    vpsrlw $1, %zmm2, %zmm2
+; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
+; AVX512BW-NEXT:    vpaddb %zmm1, %zmm2, %zmm1
+; AVX512BW-NEXT:    vpsrlw $2, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT:    vpmovsxbw %ymm1, %zmm2
+; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512BW-NEXT:    vpmullw %zmm3, %zmm2, %zmm2
+; AVX512BW-NEXT:    vpmovwb %zmm2, %ymm2
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
+; AVX512BW-NEXT:    vpmovsxbw %ymm1, %zmm1
+; AVX512BW-NEXT:    vpmullw %zmm3, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpmovwb %zmm1, %ymm1
+; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm2, %zmm1
+; AVX512BW-NEXT:    vpsubb %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
   %res = urem <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
   ret <64 x i8> %res




More information about the llvm-commits mailing list