[llvm] [LegalizeIntegerTypes] Add `PromoteIntOp_ANY_EXTEND_VECTOR_INREG` (PR #178144)

Abhishek Kaushik via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 27 01:43:17 PST 2026


abhishek-kaushik22 wrote:

```diff
diff --git a/llvm/test/CodeGen/X86/pr161013.ll b/llvm/test/CodeGen/X86/pr161013.ll
index 2e805047f584..f78d64a16c57 100644
--- a/llvm/test/CodeGen/X86/pr161013.ll
+++ b/llvm/test/CodeGen/X86/pr161013.ll
@@ -5,392 +5,10 @@
 
 
 define <32 x i4> @avir_v4i4_to_v32i4(<4 x i4> %arg) {
-; AVX1-LABEL: avir_v4i4_to_v32i4:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; AVX1-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
-; AVX1-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
-; AVX1-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $4, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    movl %ecx, %esi
-; AVX1-NEXT:    andl $15, %esi
-; AVX1-NEXT:    vmovd %esi, %xmm0
-; AVX1-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $8, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    vpinsrb $2, %edx, %xmm0, %xmm0
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $12, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    vpinsrb $3, %edx, %xmm0, %xmm0
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    vpinsrb $4, %edx, %xmm0, %xmm0
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $20, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    vpinsrb $5, %edx, %xmm0, %xmm0
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $24, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    vpinsrb $6, %edx, %xmm0, %xmm0
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $28, %edx
-; AVX1-NEXT:    vpinsrb $7, %edx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rcx, %rdx
-; AVX1-NEXT:    shrq $32, %rdx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    vpinsrb $8, %edx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rcx, %rdx
-; AVX1-NEXT:    shrq $36, %rdx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    vpinsrb $9, %edx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rcx, %rdx
-; AVX1-NEXT:    shrq $40, %rdx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    vpinsrb $10, %edx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rcx, %rdx
-; AVX1-NEXT:    shrq $44, %rdx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    vpinsrb $11, %edx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rcx, %rdx
-; AVX1-NEXT:    shrq $48, %rdx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    vpinsrb $12, %edx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rcx, %rdx
-; AVX1-NEXT:    shrq $52, %rdx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    vpinsrb $13, %edx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rcx, %rdx
-; AVX1-NEXT:    shrq $56, %rdx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    vpinsrb $14, %edx, %xmm0, %xmm0
-; AVX1-NEXT:    shrq $60, %rcx
-; AVX1-NEXT:    vpinsrb $15, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $4, %ecx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    movl %eax, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    vmovd %edx, %xmm1
-; AVX1-NEXT:    vpinsrb $1, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $8, %ecx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    vpinsrb $2, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $12, %ecx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    vpinsrb $3, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    vpinsrb $4, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $20, %ecx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    vpinsrb $5, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $24, %ecx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    vpinsrb $6, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $28, %ecx
-; AVX1-NEXT:    vpinsrb $7, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shrq $32, %rcx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    vpinsrb $8, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shrq $36, %rcx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    vpinsrb $9, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shrq $40, %rcx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    vpinsrb $10, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shrq $44, %rcx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    vpinsrb $11, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shrq $48, %rcx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    vpinsrb $12, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shrq $52, %rcx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    vpinsrb $13, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shrq $56, %rcx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    vpinsrb $14, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    shrq $60, %rax
-; AVX1-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: avir_v4i4_to_v32i4:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; AVX2-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
-; AVX2-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
-; AVX2-NEXT:    movl %ecx, %edx
-; AVX2-NEXT:    shrl $4, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    movl %ecx, %esi
-; AVX2-NEXT:    andl $15, %esi
-; AVX2-NEXT:    vmovd %esi, %xmm0
-; AVX2-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0
-; AVX2-NEXT:    movl %ecx, %edx
-; AVX2-NEXT:    shrl $8, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    vpinsrb $2, %edx, %xmm0, %xmm0
-; AVX2-NEXT:    movl %ecx, %edx
-; AVX2-NEXT:    shrl $12, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    vpinsrb $3, %edx, %xmm0, %xmm0
-; AVX2-NEXT:    movl %ecx, %edx
-; AVX2-NEXT:    shrl $16, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    vpinsrb $4, %edx, %xmm0, %xmm0
-; AVX2-NEXT:    movl %ecx, %edx
-; AVX2-NEXT:    shrl $20, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    vpinsrb $5, %edx, %xmm0, %xmm0
-; AVX2-NEXT:    movl %ecx, %edx
-; AVX2-NEXT:    shrl $24, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    vpinsrb $6, %edx, %xmm0, %xmm0
-; AVX2-NEXT:    movl %ecx, %edx
-; AVX2-NEXT:    shrl $28, %edx
-; AVX2-NEXT:    vpinsrb $7, %edx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rcx, %rdx
-; AVX2-NEXT:    shrq $32, %rdx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    vpinsrb $8, %edx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rcx, %rdx
-; AVX2-NEXT:    shrq $36, %rdx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    vpinsrb $9, %edx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rcx, %rdx
-; AVX2-NEXT:    shrq $40, %rdx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    vpinsrb $10, %edx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rcx, %rdx
-; AVX2-NEXT:    shrq $44, %rdx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    vpinsrb $11, %edx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rcx, %rdx
-; AVX2-NEXT:    shrq $48, %rdx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    vpinsrb $12, %edx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rcx, %rdx
-; AVX2-NEXT:    shrq $52, %rdx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    vpinsrb $13, %edx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rcx, %rdx
-; AVX2-NEXT:    shrq $56, %rdx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    vpinsrb $14, %edx, %xmm0, %xmm0
-; AVX2-NEXT:    shrq $60, %rcx
-; AVX2-NEXT:    vpinsrb $15, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movl %eax, %ecx
-; AVX2-NEXT:    shrl $4, %ecx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    movl %eax, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    vmovd %edx, %xmm1
-; AVX2-NEXT:    vpinsrb $1, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movl %eax, %ecx
-; AVX2-NEXT:    shrl $8, %ecx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    vpinsrb $2, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movl %eax, %ecx
-; AVX2-NEXT:    shrl $12, %ecx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    vpinsrb $3, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movl %eax, %ecx
-; AVX2-NEXT:    shrl $16, %ecx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    vpinsrb $4, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movl %eax, %ecx
-; AVX2-NEXT:    shrl $20, %ecx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    vpinsrb $5, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movl %eax, %ecx
-; AVX2-NEXT:    shrl $24, %ecx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    vpinsrb $6, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movl %eax, %ecx
-; AVX2-NEXT:    shrl $28, %ecx
-; AVX2-NEXT:    vpinsrb $7, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shrq $32, %rcx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    vpinsrb $8, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shrq $36, %rcx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    vpinsrb $9, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shrq $40, %rcx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    vpinsrb $10, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shrq $44, %rcx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    vpinsrb $11, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shrq $48, %rcx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    vpinsrb $12, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shrq $52, %rcx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    vpinsrb $13, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shrq $56, %rcx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    vpinsrb $14, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    shrq $60, %rax
-; AVX2-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
-; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
-; AVX2-NEXT:    retq
-;
-; AVX512-LABEL: avir_v4i4_to_v32i4:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; AVX512-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
-; AVX512-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
-; AVX512-NEXT:    movl %ecx, %edx
-; AVX512-NEXT:    shrl $4, %edx
-; AVX512-NEXT:    andl $15, %edx
-; AVX512-NEXT:    movl %ecx, %esi
-; AVX512-NEXT:    andl $15, %esi
-; AVX512-NEXT:    vmovd %esi, %xmm0
-; AVX512-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0
-; AVX512-NEXT:    movl %ecx, %edx
-; AVX512-NEXT:    shrl $8, %edx
-; AVX512-NEXT:    andl $15, %edx
-; AVX512-NEXT:    vpinsrb $2, %edx, %xmm0, %xmm0
-; AVX512-NEXT:    movl %ecx, %edx
-; AVX512-NEXT:    shrl $12, %edx
-; AVX512-NEXT:    andl $15, %edx
-; AVX512-NEXT:    vpinsrb $3, %edx, %xmm0, %xmm0
-; AVX512-NEXT:    movl %ecx, %edx
-; AVX512-NEXT:    shrl $16, %edx
-; AVX512-NEXT:    andl $15, %edx
-; AVX512-NEXT:    vpinsrb $4, %edx, %xmm0, %xmm0
-; AVX512-NEXT:    movl %ecx, %edx
-; AVX512-NEXT:    shrl $20, %edx
-; AVX512-NEXT:    andl $15, %edx
-; AVX512-NEXT:    vpinsrb $5, %edx, %xmm0, %xmm0
-; AVX512-NEXT:    movl %ecx, %edx
-; AVX512-NEXT:    shrl $24, %edx
-; AVX512-NEXT:    andl $15, %edx
-; AVX512-NEXT:    vpinsrb $6, %edx, %xmm0, %xmm0
-; AVX512-NEXT:    movl %ecx, %edx
-; AVX512-NEXT:    shrl $28, %edx
-; AVX512-NEXT:    vpinsrb $7, %edx, %xmm0, %xmm0
-; AVX512-NEXT:    movq %rcx, %rdx
-; AVX512-NEXT:    shrq $32, %rdx
-; AVX512-NEXT:    andl $15, %edx
-; AVX512-NEXT:    vpinsrb $8, %edx, %xmm0, %xmm0
-; AVX512-NEXT:    movq %rcx, %rdx
-; AVX512-NEXT:    shrq $36, %rdx
-; AVX512-NEXT:    andl $15, %edx
-; AVX512-NEXT:    vpinsrb $9, %edx, %xmm0, %xmm0
-; AVX512-NEXT:    movq %rcx, %rdx
-; AVX512-NEXT:    shrq $40, %rdx
-; AVX512-NEXT:    andl $15, %edx
-; AVX512-NEXT:    vpinsrb $10, %edx, %xmm0, %xmm0
-; AVX512-NEXT:    movq %rcx, %rdx
-; AVX512-NEXT:    shrq $44, %rdx
-; AVX512-NEXT:    andl $15, %edx
-; AVX512-NEXT:    vpinsrb $11, %edx, %xmm0, %xmm0
-; AVX512-NEXT:    movq %rcx, %rdx
-; AVX512-NEXT:    shrq $48, %rdx
-; AVX512-NEXT:    andl $15, %edx
-; AVX512-NEXT:    vpinsrb $12, %edx, %xmm0, %xmm0
-; AVX512-NEXT:    movq %rcx, %rdx
-; AVX512-NEXT:    shrq $52, %rdx
-; AVX512-NEXT:    andl $15, %edx
-; AVX512-NEXT:    vpinsrb $13, %edx, %xmm0, %xmm0
-; AVX512-NEXT:    movq %rcx, %rdx
-; AVX512-NEXT:    shrq $56, %rdx
-; AVX512-NEXT:    andl $15, %edx
-; AVX512-NEXT:    vpinsrb $14, %edx, %xmm0, %xmm0
-; AVX512-NEXT:    shrq $60, %rcx
-; AVX512-NEXT:    vpinsrb $15, %ecx, %xmm0, %xmm0
-; AVX512-NEXT:    movl %eax, %ecx
-; AVX512-NEXT:    shrl $4, %ecx
-; AVX512-NEXT:    andl $15, %ecx
-; AVX512-NEXT:    movl %eax, %edx
-; AVX512-NEXT:    andl $15, %edx
-; AVX512-NEXT:    vmovd %edx, %xmm1
-; AVX512-NEXT:    vpinsrb $1, %ecx, %xmm1, %xmm1
-; AVX512-NEXT:    movl %eax, %ecx
-; AVX512-NEXT:    shrl $8, %ecx
-; AVX512-NEXT:    andl $15, %ecx
-; AVX512-NEXT:    vpinsrb $2, %ecx, %xmm1, %xmm1
-; AVX512-NEXT:    movl %eax, %ecx
-; AVX512-NEXT:    shrl $12, %ecx
-; AVX512-NEXT:    andl $15, %ecx
-; AVX512-NEXT:    vpinsrb $3, %ecx, %xmm1, %xmm1
-; AVX512-NEXT:    movl %eax, %ecx
-; AVX512-NEXT:    shrl $16, %ecx
-; AVX512-NEXT:    andl $15, %ecx
-; AVX512-NEXT:    vpinsrb $4, %ecx, %xmm1, %xmm1
-; AVX512-NEXT:    movl %eax, %ecx
-; AVX512-NEXT:    shrl $20, %ecx
-; AVX512-NEXT:    andl $15, %ecx
-; AVX512-NEXT:    vpinsrb $5, %ecx, %xmm1, %xmm1
-; AVX512-NEXT:    movl %eax, %ecx
-; AVX512-NEXT:    shrl $24, %ecx
-; AVX512-NEXT:    andl $15, %ecx
-; AVX512-NEXT:    vpinsrb $6, %ecx, %xmm1, %xmm1
-; AVX512-NEXT:    movl %eax, %ecx
-; AVX512-NEXT:    shrl $28, %ecx
-; AVX512-NEXT:    vpinsrb $7, %ecx, %xmm1, %xmm1
-; AVX512-NEXT:    movq %rax, %rcx
-; AVX512-NEXT:    shrq $32, %rcx
-; AVX512-NEXT:    andl $15, %ecx
-; AVX512-NEXT:    vpinsrb $8, %ecx, %xmm1, %xmm1
-; AVX512-NEXT:    movq %rax, %rcx
-; AVX512-NEXT:    shrq $36, %rcx
-; AVX512-NEXT:    andl $15, %ecx
-; AVX512-NEXT:    vpinsrb $9, %ecx, %xmm1, %xmm1
-; AVX512-NEXT:    movq %rax, %rcx
-; AVX512-NEXT:    shrq $40, %rcx
-; AVX512-NEXT:    andl $15, %ecx
-; AVX512-NEXT:    vpinsrb $10, %ecx, %xmm1, %xmm1
-; AVX512-NEXT:    movq %rax, %rcx
-; AVX512-NEXT:    shrq $44, %rcx
-; AVX512-NEXT:    andl $15, %ecx
-; AVX512-NEXT:    vpinsrb $11, %ecx, %xmm1, %xmm1
-; AVX512-NEXT:    movq %rax, %rcx
-; AVX512-NEXT:    shrq $48, %rcx
-; AVX512-NEXT:    andl $15, %ecx
-; AVX512-NEXT:    vpinsrb $12, %ecx, %xmm1, %xmm1
-; AVX512-NEXT:    movq %rax, %rcx
-; AVX512-NEXT:    shrq $52, %rcx
-; AVX512-NEXT:    andl $15, %ecx
-; AVX512-NEXT:    vpinsrb $13, %ecx, %xmm1, %xmm1
-; AVX512-NEXT:    movq %rax, %rcx
-; AVX512-NEXT:    shrq $56, %rcx
-; AVX512-NEXT:    andl $15, %ecx
-; AVX512-NEXT:    vpinsrb $14, %ecx, %xmm1, %xmm1
-; AVX512-NEXT:    shrq $60, %rax
-; AVX512-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
-; AVX512-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
-; AVX512-NEXT:    retq
+; AVX-LABEL: avir_v4i4_to_v32i4:
+; AVX:       # %bb.0:
+; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX-NEXT:    retq
   %res = shufflevector <4 x i4> %arg, <4 x i4> poison,
   <32 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1     , i32 poison, i32 poison, i32 poison,
               i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
@@ -403,8 +21,22 @@ define <64 x i4> @avir_v4i4_to_v64i4(<4 x i4> %arg) {
 ; AVX-LABEL: avir_v4i4_to_v64i4:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    movq %rdi, %rax
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
-; AVX-NEXT:    vmovdqa %xmm0, (%rdi)
+; AVX-NEXT:    vmovd %xmm0, %ecx
+; AVX-NEXT:    andl $15, %ecx
+; AVX-NEXT:    vpextrb $4, %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    shll $8, %edx
+; AVX-NEXT:    orl %ecx, %edx
+; AVX-NEXT:    vpextrb $8, %xmm0, %ecx
+; AVX-NEXT:    andl $15, %ecx
+; AVX-NEXT:    shll $16, %ecx
+; AVX-NEXT:    orl %edx, %ecx
+; AVX-NEXT:    vpextrb $12, %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    shll $24, %edx
+; AVX-NEXT:    orl %ecx, %edx
+; AVX-NEXT:    movq %rdx, (%rdi)
+; AVX-NEXT:    movq $0, 8(%rdi)
 ; AVX-NEXT:    retq
   %res = shufflevector <4 x i4> %arg, <4 x i4> poison,
   <64 x i32> <i32 0     , i32 poison, i32 1     , i32 poison, i32 2     , i32 poison, i32 3     , i32 poison,
@@ -422,8 +54,68 @@ define <64 x i4> @avir_v8i4_to_v64i4(<8 x i4> %arg) {
 ; AVX-LABEL: avir_v8i4_to_v64i4:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    movq %rdi, %rax
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; AVX-NEXT:    vmovdqa %xmm0, (%rdi)
+; AVX-NEXT:    vpextrb $8, %xmm0, %ecx
+; AVX-NEXT:    andl $15, %ecx
+; AVX-NEXT:    shlq $32, %rcx
+; AVX-NEXT:    vmovd %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    vpextrb $1, %xmm0, %esi
+; AVX-NEXT:    andl $15, %esi
+; AVX-NEXT:    shll $4, %esi
+; AVX-NEXT:    orl %edx, %esi
+; AVX-NEXT:    vpextrb $2, %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    shll $8, %edx
+; AVX-NEXT:    orl %esi, %edx
+; AVX-NEXT:    vpextrb $3, %xmm0, %esi
+; AVX-NEXT:    andl $15, %esi
+; AVX-NEXT:    shll $12, %esi
+; AVX-NEXT:    orl %edx, %esi
+; AVX-NEXT:    vpextrb $4, %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    shll $16, %edx
+; AVX-NEXT:    orl %esi, %edx
+; AVX-NEXT:    vpextrb $5, %xmm0, %esi
+; AVX-NEXT:    andl $15, %esi
+; AVX-NEXT:    shll $20, %esi
+; AVX-NEXT:    orl %edx, %esi
+; AVX-NEXT:    vpextrb $6, %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    shll $24, %edx
+; AVX-NEXT:    vpextrb $7, %xmm0, %edi
+; AVX-NEXT:    shll $28, %edi
+; AVX-NEXT:    orl %edx, %edi
+; AVX-NEXT:    orl %esi, %edi
+; AVX-NEXT:    orq %rcx, %rdi
+; AVX-NEXT:    vpextrb $9, %xmm0, %ecx
+; AVX-NEXT:    andl $15, %ecx
+; AVX-NEXT:    shlq $36, %rcx
+; AVX-NEXT:    orq %rdi, %rcx
+; AVX-NEXT:    vpextrb $10, %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    shlq $40, %rdx
+; AVX-NEXT:    orq %rcx, %rdx
+; AVX-NEXT:    vpextrb $11, %xmm0, %ecx
+; AVX-NEXT:    andl $15, %ecx
+; AVX-NEXT:    shlq $44, %rcx
+; AVX-NEXT:    orq %rdx, %rcx
+; AVX-NEXT:    vpextrb $12, %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    shlq $48, %rdx
+; AVX-NEXT:    vpextrb $13, %xmm0, %esi
+; AVX-NEXT:    andl $15, %esi
+; AVX-NEXT:    shlq $52, %rsi
+; AVX-NEXT:    orq %rdx, %rsi
+; AVX-NEXT:    vpextrb $14, %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    shlq $56, %rdx
+; AVX-NEXT:    orq %rsi, %rdx
+; AVX-NEXT:    vpextrb $15, %xmm0, %esi
+; AVX-NEXT:    shlq $60, %rsi
+; AVX-NEXT:    orq %rdx, %rsi
+; AVX-NEXT:    orq %rcx, %rsi
+; AVX-NEXT:    movq %rsi, (%rax)
+; AVX-NEXT:    movq $0, 8(%rax)
 ; AVX-NEXT:    retq
   %res = shufflevector <8 x i4> %arg, <8 x i4> poison,
   <64 x i32> <i32 0     , i32 poison, i32 1     , i32 poison, i32 2     , i32 poison, i32 3     , i32 poison,
@@ -441,7 +133,22 @@ define <64 x i4> @avir_v16i4_to_v64i4(<16 x i4> %arg) {
 ; AVX-LABEL: avir_v16i4_to_v64i4:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    movq %rdi, %rax
-; AVX-NEXT:    vmovaps %xmm0, (%rdi)
+; AVX-NEXT:    vmovd %xmm0, %ecx
+; AVX-NEXT:    andl $15, %ecx
+; AVX-NEXT:    vpextrb $1, %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    shll $8, %edx
+; AVX-NEXT:    orl %ecx, %edx
+; AVX-NEXT:    vpextrb $2, %xmm0, %ecx
+; AVX-NEXT:    andl $15, %ecx
+; AVX-NEXT:    shll $16, %ecx
+; AVX-NEXT:    orl %edx, %ecx
+; AVX-NEXT:    vpextrb $3, %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    shll $24, %edx
+; AVX-NEXT:    orl %ecx, %edx
+; AVX-NEXT:    movq %rdx, (%rdi)
+; AVX-NEXT:    movq $0, 8(%rdi)
 ; AVX-NEXT:    retq
   %res = shufflevector <16 x i4> %arg, <16 x i4> poison,
   <64 x i32> <i32 0     , i32 poison, i32 1     , i32 poison, i32 2     , i32 poison, i32 3     , i32 poison,
@@ -456,205 +163,22 @@ define <64 x i4> @avir_v16i4_to_v64i4(<16 x i4> %arg) {
 }
 
 define <128 x i4> @avir_v4i4_to_v128i4(<4 x i4> %arg) {
-; AVX1-LABEL: avir_v4i4_to_v128i4:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    movq %rdi, %rax
-; AVX1-NEXT:    vpextrb $8, %xmm0, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    movq %rdx, %rcx
-; AVX1-NEXT:    shlq $32, %rcx
-; AVX1-NEXT:    vmovd %xmm0, %esi
-; AVX1-NEXT:    andl $15, %esi
-; AVX1-NEXT:    vpextrb $1, %xmm0, %edi
-; AVX1-NEXT:    andl $15, %edi
-; AVX1-NEXT:    shll $4, %edi
-; AVX1-NEXT:    orl %esi, %edi
-; AVX1-NEXT:    vpextrb $2, %xmm0, %esi
-; AVX1-NEXT:    andl $15, %esi
-; AVX1-NEXT:    shll $8, %esi
-; AVX1-NEXT:    orl %edi, %esi
-; AVX1-NEXT:    vpextrb $3, %xmm0, %edi
-; AVX1-NEXT:    andl $15, %edi
-; AVX1-NEXT:    shll $12, %edi
-; AVX1-NEXT:    orl %esi, %edi
-; AVX1-NEXT:    shll $16, %edx
-; AVX1-NEXT:    orl %edi, %edx
-; AVX1-NEXT:    vpextrb $9, %xmm0, %edi
-; AVX1-NEXT:    andl $15, %edi
-; AVX1-NEXT:    movl %edi, %r8d
-; AVX1-NEXT:    shll $20, %r8d
-; AVX1-NEXT:    orl %edx, %r8d
-; AVX1-NEXT:    vpextrb $12, %xmm0, %esi
-; AVX1-NEXT:    andl $15, %esi
-; AVX1-NEXT:    movl %esi, %r9d
-; AVX1-NEXT:    shll $24, %r9d
-; AVX1-NEXT:    vpextrb $13, %xmm0, %edx
-; AVX1-NEXT:    movl %edx, %r10d
-; AVX1-NEXT:    shll $28, %r10d
-; AVX1-NEXT:    orl %r9d, %r10d
-; AVX1-NEXT:    orl %r8d, %r10d
-; AVX1-NEXT:    orq %rcx, %r10
-; AVX1-NEXT:    shlq $36, %rdi
-; AVX1-NEXT:    orq %r10, %rdi
-; AVX1-NEXT:    movq %rsi, %rcx
-; AVX1-NEXT:    shlq $40, %rcx
-; AVX1-NEXT:    orq %rdi, %rcx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    movq %rdx, %rdi
-; AVX1-NEXT:    shlq $44, %rdi
-; AVX1-NEXT:    orq %rcx, %rdi
-; AVX1-NEXT:    shlq $48, %rsi
-; AVX1-NEXT:    shlq $52, %rdx
-; AVX1-NEXT:    orq %rsi, %rdx
-; AVX1-NEXT:    vpextrb $14, %xmm0, %ecx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    shlq $56, %rcx
-; AVX1-NEXT:    orq %rdx, %rcx
-; AVX1-NEXT:    vpextrb $15, %xmm0, %edx
-; AVX1-NEXT:    shlq $60, %rdx
-; AVX1-NEXT:    orq %rcx, %rdx
-; AVX1-NEXT:    orq %rdi, %rdx
-; AVX1-NEXT:    movq %rdx, (%rax)
-; AVX1-NEXT:    movq $0, 8(%rax)
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: avir_v4i4_to_v128i4:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    movq %rdi, %rax
-; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,u,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
-; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpextrb $8, %xmm1, %ecx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    shlq $32, %rcx
-; AVX2-NEXT:    vmovd %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    vpextrb $1, %xmm1, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shll $4, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    vpextrb $2, %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shll $8, %edx
-; AVX2-NEXT:    orl %esi, %edx
-; AVX2-NEXT:    vpextrb $3, %xmm1, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shll $12, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    vpextrb $4, %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shll $16, %edx
-; AVX2-NEXT:    orl %esi, %edx
-; AVX2-NEXT:    vpextrb $5, %xmm1, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shll $20, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    vpextrb $6, %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shll $24, %edx
-; AVX2-NEXT:    orl %esi, %edx
-; AVX2-NEXT:    vpextrb $7, %xmm1, %esi
-; AVX2-NEXT:    shll $28, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    orq %rcx, %rsi
-; AVX2-NEXT:    vpextrb $9, %xmm1, %ecx
-; AVX2-NEXT:    movzwl %cx, %ecx
-; AVX2-NEXT:    shlq $36, %rcx
-; AVX2-NEXT:    orq %rsi, %rcx
-; AVX2-NEXT:    vpextrb $10, %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $40, %rdx
-; AVX2-NEXT:    orq %rcx, %rdx
-; AVX2-NEXT:    vpextrb $11, %xmm1, %ecx
-; AVX2-NEXT:    movzwl %cx, %ecx
-; AVX2-NEXT:    shlq $44, %rcx
-; AVX2-NEXT:    orq %rdx, %rcx
-; AVX2-NEXT:    vpextrb $12, %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $48, %rdx
-; AVX2-NEXT:    vpextrb $13, %xmm1, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shlq $52, %rsi
-; AVX2-NEXT:    orq %rdx, %rsi
-; AVX2-NEXT:    vpextrb $14, %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $56, %rdx
-; AVX2-NEXT:    orq %rsi, %rdx
-; AVX2-NEXT:    orq %rcx, %rdx
-; AVX2-NEXT:    vpextrb $15, %xmm1, %ecx
-; AVX2-NEXT:    shlq $60, %rcx
-; AVX2-NEXT:    orq %rdx, %rcx
-; AVX2-NEXT:    movq %rcx, 8(%rdi)
-; AVX2-NEXT:    vpextrb $8, %xmm0, %ecx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    shlq $32, %rcx
-; AVX2-NEXT:    vmovd %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    vpextrb $1, %xmm0, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shll $4, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    vpextrb $2, %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shll $8, %edx
-; AVX2-NEXT:    orl %esi, %edx
-; AVX2-NEXT:    vpextrb $3, %xmm0, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shll $12, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    vpextrb $4, %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shll $16, %edx
-; AVX2-NEXT:    orl %esi, %edx
-; AVX2-NEXT:    vpextrb $5, %xmm0, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shll $20, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    vpextrb $6, %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shll $24, %edx
-; AVX2-NEXT:    orl %esi, %edx
-; AVX2-NEXT:    vpextrb $7, %xmm0, %esi
-; AVX2-NEXT:    shll $28, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    orq %rcx, %rsi
-; AVX2-NEXT:    vpextrb $9, %xmm0, %ecx
-; AVX2-NEXT:    movzwl %cx, %ecx
-; AVX2-NEXT:    shlq $36, %rcx
-; AVX2-NEXT:    orq %rsi, %rcx
-; AVX2-NEXT:    vpextrb $10, %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $40, %rdx
-; AVX2-NEXT:    orq %rcx, %rdx
-; AVX2-NEXT:    vpextrb $11, %xmm0, %ecx
-; AVX2-NEXT:    movzwl %cx, %ecx
-; AVX2-NEXT:    shlq $44, %rcx
-; AVX2-NEXT:    orq %rdx, %rcx
-; AVX2-NEXT:    vpextrb $12, %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $48, %rdx
-; AVX2-NEXT:    vpextrb $13, %xmm0, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shlq $52, %rsi
-; AVX2-NEXT:    orq %rdx, %rsi
-; AVX2-NEXT:    vpextrb $14, %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $56, %rdx
-; AVX2-NEXT:    orq %rsi, %rdx
-; AVX2-NEXT:    orq %rcx, %rdx
-; AVX2-NEXT:    vpextrb $15, %xmm0, %ecx
-; AVX2-NEXT:    shlq $60, %rcx
-; AVX2-NEXT:    orq %rdx, %rcx
-; AVX2-NEXT:    movq %rcx, (%rdi)
-; AVX2-NEXT:    vzeroupper
-; AVX2-NEXT:    retq
-;
-; AVX512-LABEL: avir_v4i4_to_v128i4:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    movq %rdi, %rax
-; AVX512-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
-; AVX512-NEXT:    vmovdqa %xmm0, (%rdi)
-; AVX512-NEXT:    retq
+; AVX-LABEL: avir_v4i4_to_v128i4:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movq %rdi, %rax
+; AVX-NEXT:    vmovd %xmm0, %ecx
+; AVX-NEXT:    andl $15, %ecx
+; AVX-NEXT:    vpextrb $8, %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    shll $16, %edx
+; AVX-NEXT:    orl %ecx, %edx
+; AVX-NEXT:    vpextrb $12, %xmm0, %ecx
+; AVX-NEXT:    andl $15, %ecx
+; AVX-NEXT:    shll $24, %ecx
+; AVX-NEXT:    orl %edx, %ecx
+; AVX-NEXT:    movq %rcx, (%rdi)
+; AVX-NEXT:    movq $0, 8(%rdi)
+; AVX-NEXT:    retq
   %res = shufflevector <4 x i4> %arg, <4 x i4> poison,
   <128 x i32> <i32 0     , i32 poison, i32 poison, i32 poison, i32 2     , i32 poison, i32 3     , i32 poison,
               i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
@@ -676,210 +200,72 @@ define <128 x i4> @avir_v4i4_to_v128i4(<4 x i4> %arg) {
 }
 
 define <128 x i4> @avir_v8i4_to_v128i4(<8 x i4> %arg) {
-; AVX1-LABEL: avir_v8i4_to_v128i4:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    movq %rdi, %rax
-; AVX1-NEXT:    vpextrb $8, %xmm0, %ecx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    shlq $32, %rcx
-; AVX1-NEXT:    vmovd %xmm0, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    vpextrb $1, %xmm0, %esi
-; AVX1-NEXT:    andl $15, %esi
-; AVX1-NEXT:    shll $4, %esi
-; AVX1-NEXT:    orl %edx, %esi
-; AVX1-NEXT:    vpextrb $2, %xmm0, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    shll $8, %edx
-; AVX1-NEXT:    orl %esi, %edx
-; AVX1-NEXT:    vpextrb $3, %xmm0, %esi
-; AVX1-NEXT:    andl $15, %esi
-; AVX1-NEXT:    shll $12, %esi
-; AVX1-NEXT:    orl %edx, %esi
-; AVX1-NEXT:    vpextrb $4, %xmm0, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    shll $16, %edx
-; AVX1-NEXT:    orl %esi, %edx
-; AVX1-NEXT:    vpextrb $5, %xmm0, %esi
-; AVX1-NEXT:    andl $15, %esi
-; AVX1-NEXT:    shll $20, %esi
-; AVX1-NEXT:    orl %edx, %esi
-; AVX1-NEXT:    vpextrb $6, %xmm0, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    shll $24, %edx
-; AVX1-NEXT:    vpextrb $7, %xmm0, %edi
-; AVX1-NEXT:    shll $28, %edi
-; AVX1-NEXT:    orl %edx, %edi
-; AVX1-NEXT:    orl %esi, %edi
-; AVX1-NEXT:    orq %rcx, %rdi
-; AVX1-NEXT:    vpextrb $9, %xmm0, %ecx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    shlq $36, %rcx
-; AVX1-NEXT:    orq %rdi, %rcx
-; AVX1-NEXT:    vpextrb $10, %xmm0, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    shlq $40, %rdx
-; AVX1-NEXT:    orq %rcx, %rdx
-; AVX1-NEXT:    vpextrb $11, %xmm0, %ecx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    shlq $44, %rcx
-; AVX1-NEXT:    orq %rdx, %rcx
-; AVX1-NEXT:    vpextrb $12, %xmm0, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    shlq $48, %rdx
-; AVX1-NEXT:    vpextrb $13, %xmm0, %esi
-; AVX1-NEXT:    andl $15, %esi
-; AVX1-NEXT:    shlq $52, %rsi
-; AVX1-NEXT:    orq %rdx, %rsi
-; AVX1-NEXT:    vpextrb $14, %xmm0, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    shlq $56, %rdx
-; AVX1-NEXT:    orq %rsi, %rdx
-; AVX1-NEXT:    vpextrb $15, %xmm0, %esi
-; AVX1-NEXT:    shlq $60, %rsi
-; AVX1-NEXT:    orq %rdx, %rsi
-; AVX1-NEXT:    orq %rcx, %rsi
-; AVX1-NEXT:    movq %rsi, (%rax)
-; AVX1-NEXT:    movq $0, 8(%rax)
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: avir_v8i4_to_v128i4:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    movq %rdi, %rax
-; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,u,4,6,u,u,u,u,u,u,u,u,u,u,u,u]
-; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpextrb $8, %xmm1, %ecx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    shlq $32, %rcx
-; AVX2-NEXT:    vmovd %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    vpextrb $1, %xmm1, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shll $4, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    vpextrb $2, %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shll $8, %edx
-; AVX2-NEXT:    orl %esi, %edx
-; AVX2-NEXT:    vpextrb $3, %xmm1, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shll $12, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    vpextrb $4, %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shll $16, %edx
-; AVX2-NEXT:    orl %esi, %edx
-; AVX2-NEXT:    vpextrb $5, %xmm1, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shll $20, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    vpextrb $6, %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shll $24, %edx
-; AVX2-NEXT:    orl %esi, %edx
-; AVX2-NEXT:    vpextrb $7, %xmm1, %esi
-; AVX2-NEXT:    shll $28, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    orq %rcx, %rsi
-; AVX2-NEXT:    vpextrb $9, %xmm1, %ecx
-; AVX2-NEXT:    movzwl %cx, %ecx
-; AVX2-NEXT:    shlq $36, %rcx
-; AVX2-NEXT:    orq %rsi, %rcx
-; AVX2-NEXT:    vpextrb $10, %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $40, %rdx
-; AVX2-NEXT:    orq %rcx, %rdx
-; AVX2-NEXT:    vpextrb $11, %xmm1, %ecx
-; AVX2-NEXT:    movzwl %cx, %ecx
-; AVX2-NEXT:    shlq $44, %rcx
-; AVX2-NEXT:    orq %rdx, %rcx
-; AVX2-NEXT:    vpextrb $12, %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $48, %rdx
-; AVX2-NEXT:    vpextrb $13, %xmm1, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shlq $52, %rsi
-; AVX2-NEXT:    orq %rdx, %rsi
-; AVX2-NEXT:    vpextrb $14, %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $56, %rdx
-; AVX2-NEXT:    orq %rsi, %rdx
-; AVX2-NEXT:    orq %rcx, %rdx
-; AVX2-NEXT:    vpextrb $15, %xmm1, %ecx
-; AVX2-NEXT:    shlq $60, %rcx
-; AVX2-NEXT:    orq %rdx, %rcx
-; AVX2-NEXT:    movq %rcx, 8(%rdi)
-; AVX2-NEXT:    vpextrb $8, %xmm0, %ecx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    shlq $32, %rcx
-; AVX2-NEXT:    vmovd %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    vpextrb $1, %xmm0, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shll $4, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    vpextrb $2, %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shll $8, %edx
-; AVX2-NEXT:    orl %esi, %edx
-; AVX2-NEXT:    vpextrb $3, %xmm0, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shll $12, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    vpextrb $4, %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shll $16, %edx
-; AVX2-NEXT:    orl %esi, %edx
-; AVX2-NEXT:    vpextrb $5, %xmm0, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shll $20, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    vpextrb $6, %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shll $24, %edx
-; AVX2-NEXT:    orl %esi, %edx
-; AVX2-NEXT:    vpextrb $7, %xmm0, %esi
-; AVX2-NEXT:    shll $28, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    orq %rcx, %rsi
-; AVX2-NEXT:    vpextrb $9, %xmm0, %ecx
-; AVX2-NEXT:    movzwl %cx, %ecx
-; AVX2-NEXT:    shlq $36, %rcx
-; AVX2-NEXT:    orq %rsi, %rcx
-; AVX2-NEXT:    vpextrb $10, %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $40, %rdx
-; AVX2-NEXT:    orq %rcx, %rdx
-; AVX2-NEXT:    vpextrb $11, %xmm0, %ecx
-; AVX2-NEXT:    movzwl %cx, %ecx
-; AVX2-NEXT:    shlq $44, %rcx
-; AVX2-NEXT:    orq %rdx, %rcx
-; AVX2-NEXT:    vpextrb $12, %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $48, %rdx
-; AVX2-NEXT:    vpextrb $13, %xmm0, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shlq $52, %rsi
-; AVX2-NEXT:    orq %rdx, %rsi
-; AVX2-NEXT:    vpextrb $14, %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $56, %rdx
-; AVX2-NEXT:    orq %rsi, %rdx
-; AVX2-NEXT:    orq %rcx, %rdx
-; AVX2-NEXT:    vpextrb $15, %xmm0, %ecx
-; AVX2-NEXT:    shlq $60, %rcx
-; AVX2-NEXT:    orq %rdx, %rcx
-; AVX2-NEXT:    movq %rcx, (%rdi)
-; AVX2-NEXT:    vzeroupper
-; AVX2-NEXT:    retq
-;
-; AVX512-LABEL: avir_v8i4_to_v128i4:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    movq %rdi, %rax
-; AVX512-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; AVX512-NEXT:    vmovdqa %xmm0, (%rdi)
-; AVX512-NEXT:    retq
+; AVX-LABEL: avir_v8i4_to_v128i4:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movq %rdi, %rax
+; AVX-NEXT:    vpextrb $8, %xmm0, %ecx
+; AVX-NEXT:    andl $15, %ecx
+; AVX-NEXT:    shlq $32, %rcx
+; AVX-NEXT:    vmovd %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    vpextrb $1, %xmm0, %esi
+; AVX-NEXT:    andl $15, %esi
+; AVX-NEXT:    shll $4, %esi
+; AVX-NEXT:    orl %edx, %esi
+; AVX-NEXT:    vpextrb $2, %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    shll $8, %edx
+; AVX-NEXT:    orl %esi, %edx
+; AVX-NEXT:    vpextrb $3, %xmm0, %esi
+; AVX-NEXT:    andl $15, %esi
+; AVX-NEXT:    shll $12, %esi
+; AVX-NEXT:    orl %edx, %esi
+; AVX-NEXT:    vpextrb $4, %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    shll $16, %edx
+; AVX-NEXT:    orl %esi, %edx
+; AVX-NEXT:    vpextrb $5, %xmm0, %esi
+; AVX-NEXT:    andl $15, %esi
+; AVX-NEXT:    shll $20, %esi
+; AVX-NEXT:    orl %edx, %esi
+; AVX-NEXT:    vpextrb $6, %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    shll $24, %edx
+; AVX-NEXT:    vpextrb $7, %xmm0, %edi
+; AVX-NEXT:    shll $28, %edi
+; AVX-NEXT:    orl %edx, %edi
+; AVX-NEXT:    orl %esi, %edi
+; AVX-NEXT:    orq %rcx, %rdi
+; AVX-NEXT:    vpextrb $9, %xmm0, %ecx
+; AVX-NEXT:    andl $15, %ecx
+; AVX-NEXT:    shlq $36, %rcx
+; AVX-NEXT:    orq %rdi, %rcx
+; AVX-NEXT:    vpextrb $10, %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    shlq $40, %rdx
+; AVX-NEXT:    orq %rcx, %rdx
+; AVX-NEXT:    vpextrb $11, %xmm0, %ecx
+; AVX-NEXT:    andl $15, %ecx
+; AVX-NEXT:    shlq $44, %rcx
+; AVX-NEXT:    orq %rdx, %rcx
+; AVX-NEXT:    vpextrb $12, %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    shlq $48, %rdx
+; AVX-NEXT:    vpextrb $13, %xmm0, %esi
+; AVX-NEXT:    andl $15, %esi
+; AVX-NEXT:    shlq $52, %rsi
+; AVX-NEXT:    orq %rdx, %rsi
+; AVX-NEXT:    vpextrb $14, %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    shlq $56, %rdx
+; AVX-NEXT:    orq %rsi, %rdx
+; AVX-NEXT:    vpextrb $15, %xmm0, %esi
+; AVX-NEXT:    shlq $60, %rsi
+; AVX-NEXT:    orq %rdx, %rsi
+; AVX-NEXT:    orq %rcx, %rsi
+; AVX-NEXT:    movq %rsi, (%rax)
+; AVX-NEXT:    movq $0, 8(%rax)
+; AVX-NEXT:    retq
   %res = shufflevector <8 x i4> %arg, <8 x i4> poison,
   <128 x i32> <i32 0     , i32 poison, i32 poison, i32 poison, i32 2     , i32 poison, i32 3     , i32 poison,
               i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
@@ -901,208 +287,22 @@ define <128 x i4> @avir_v8i4_to_v128i4(<8 x i4> %arg) {
 }
 
 define <128 x i4> @avir_v16i4_to_v128i4(<16 x i4> %arg) {
-; AVX1-LABEL: avir_v16i4_to_v128i4:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    movq %rdi, %rax
-; AVX1-NEXT:    vpextrb $12, %xmm0, %ecx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    shlq $32, %rcx
-; AVX1-NEXT:    vpextrd $2, %xmm0, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    vpextrb $9, %xmm0, %esi
-; AVX1-NEXT:    andl $15, %esi
-; AVX1-NEXT:    shll $8, %esi
-; AVX1-NEXT:    orl %edx, %esi
-; AVX1-NEXT:    vpextrb $10, %xmm0, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    shll $16, %edx
-; AVX1-NEXT:    orl %esi, %edx
-; AVX1-NEXT:    vpextrb $11, %xmm0, %esi
-; AVX1-NEXT:    andl $15, %esi
-; AVX1-NEXT:    shll $24, %esi
-; AVX1-NEXT:    orl %edx, %esi
-; AVX1-NEXT:    orq %rcx, %rsi
-; AVX1-NEXT:    vpextrb $13, %xmm0, %ecx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    shlq $40, %rcx
-; AVX1-NEXT:    orq %rsi, %rcx
-; AVX1-NEXT:    vpextrb $14, %xmm0, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    shlq $48, %rdx
-; AVX1-NEXT:    orq %rcx, %rdx
-; AVX1-NEXT:    vpextrb $15, %xmm0, %ecx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    shlq $56, %rcx
-; AVX1-NEXT:    orq %rdx, %rcx
-; AVX1-NEXT:    movq %rcx, 8(%rdi)
-; AVX1-NEXT:    vpextrb $4, %xmm0, %ecx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    shlq $32, %rcx
-; AVX1-NEXT:    vmovd %xmm0, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    vpextrb $1, %xmm0, %esi
-; AVX1-NEXT:    andl $15, %esi
-; AVX1-NEXT:    shll $8, %esi
-; AVX1-NEXT:    orl %edx, %esi
-; AVX1-NEXT:    vpextrb $2, %xmm0, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    shll $16, %edx
-; AVX1-NEXT:    orl %esi, %edx
-; AVX1-NEXT:    vpextrb $3, %xmm0, %esi
-; AVX1-NEXT:    andl $15, %esi
-; AVX1-NEXT:    shll $24, %esi
-; AVX1-NEXT:    orl %edx, %esi
-; AVX1-NEXT:    orq %rcx, %rsi
-; AVX1-NEXT:    vpextrb $5, %xmm0, %ecx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    shlq $40, %rcx
-; AVX1-NEXT:    orq %rsi, %rcx
-; AVX1-NEXT:    vpextrb $6, %xmm0, %edx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    shlq $48, %rdx
-; AVX1-NEXT:    orq %rcx, %rdx
-; AVX1-NEXT:    vpextrb $7, %xmm0, %ecx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    shlq $56, %rcx
-; AVX1-NEXT:    orq %rdx, %rcx
-; AVX1-NEXT:    movq %rcx, (%rdi)
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: avir_v16i4_to_v128i4:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    movq %rdi, %rax
-; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpextrb $8, %xmm1, %ecx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    shlq $32, %rcx
-; AVX2-NEXT:    vmovd %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    vpextrb $1, %xmm1, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shll $4, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    vpextrb $2, %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shll $8, %edx
-; AVX2-NEXT:    orl %esi, %edx
-; AVX2-NEXT:    vpextrb $3, %xmm1, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shll $12, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    vpextrb $4, %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shll $16, %edx
-; AVX2-NEXT:    orl %esi, %edx
-; AVX2-NEXT:    vpextrb $5, %xmm1, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shll $20, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    vpextrb $6, %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shll $24, %edx
-; AVX2-NEXT:    orl %esi, %edx
-; AVX2-NEXT:    vpextrb $7, %xmm1, %esi
-; AVX2-NEXT:    shll $28, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    orq %rcx, %rsi
-; AVX2-NEXT:    vpextrb $9, %xmm1, %ecx
-; AVX2-NEXT:    movzwl %cx, %ecx
-; AVX2-NEXT:    shlq $36, %rcx
-; AVX2-NEXT:    orq %rsi, %rcx
-; AVX2-NEXT:    vpextrb $10, %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $40, %rdx
-; AVX2-NEXT:    orq %rcx, %rdx
-; AVX2-NEXT:    vpextrb $11, %xmm1, %ecx
-; AVX2-NEXT:    movzwl %cx, %ecx
-; AVX2-NEXT:    shlq $44, %rcx
-; AVX2-NEXT:    orq %rdx, %rcx
-; AVX2-NEXT:    vpextrb $12, %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $48, %rdx
-; AVX2-NEXT:    vpextrb $13, %xmm1, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shlq $52, %rsi
-; AVX2-NEXT:    orq %rdx, %rsi
-; AVX2-NEXT:    vpextrb $14, %xmm1, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $56, %rdx
-; AVX2-NEXT:    orq %rsi, %rdx
-; AVX2-NEXT:    orq %rcx, %rdx
-; AVX2-NEXT:    vpextrb $15, %xmm1, %ecx
-; AVX2-NEXT:    shlq $60, %rcx
-; AVX2-NEXT:    orq %rdx, %rcx
-; AVX2-NEXT:    movq %rcx, 8(%rdi)
-; AVX2-NEXT:    vpextrb $8, %xmm0, %ecx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    shlq $32, %rcx
-; AVX2-NEXT:    vmovd %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    vpextrb $1, %xmm0, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shll $4, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    vpextrb $2, %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shll $8, %edx
-; AVX2-NEXT:    orl %esi, %edx
-; AVX2-NEXT:    vpextrb $3, %xmm0, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shll $12, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    vpextrb $4, %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shll $16, %edx
-; AVX2-NEXT:    orl %esi, %edx
-; AVX2-NEXT:    vpextrb $5, %xmm0, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shll $20, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    vpextrb $6, %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shll $24, %edx
-; AVX2-NEXT:    orl %esi, %edx
-; AVX2-NEXT:    vpextrb $7, %xmm0, %esi
-; AVX2-NEXT:    shll $28, %esi
-; AVX2-NEXT:    orl %edx, %esi
-; AVX2-NEXT:    orq %rcx, %rsi
-; AVX2-NEXT:    vpextrb $9, %xmm0, %ecx
-; AVX2-NEXT:    movzwl %cx, %ecx
-; AVX2-NEXT:    shlq $36, %rcx
-; AVX2-NEXT:    orq %rsi, %rcx
-; AVX2-NEXT:    vpextrb $10, %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $40, %rdx
-; AVX2-NEXT:    orq %rcx, %rdx
-; AVX2-NEXT:    vpextrb $11, %xmm0, %ecx
-; AVX2-NEXT:    movzwl %cx, %ecx
-; AVX2-NEXT:    shlq $44, %rcx
-; AVX2-NEXT:    orq %rdx, %rcx
-; AVX2-NEXT:    vpextrb $12, %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $48, %rdx
-; AVX2-NEXT:    vpextrb $13, %xmm0, %esi
-; AVX2-NEXT:    movzwl %si, %esi
-; AVX2-NEXT:    shlq $52, %rsi
-; AVX2-NEXT:    orq %rdx, %rsi
-; AVX2-NEXT:    vpextrb $14, %xmm0, %edx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $56, %rdx
-; AVX2-NEXT:    orq %rsi, %rdx
-; AVX2-NEXT:    orq %rcx, %rdx
-; AVX2-NEXT:    vpextrb $15, %xmm0, %ecx
-; AVX2-NEXT:    shlq $60, %rcx
-; AVX2-NEXT:    orq %rdx, %rcx
-; AVX2-NEXT:    movq %rcx, (%rdi)
-; AVX2-NEXT:    vzeroupper
-; AVX2-NEXT:    retq
-;
-; AVX512-LABEL: avir_v16i4_to_v128i4:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    movq %rdi, %rax
-; AVX512-NEXT:    vmovaps %xmm0, (%rdi)
-; AVX512-NEXT:    retq
+; AVX-LABEL: avir_v16i4_to_v128i4:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movq %rdi, %rax
+; AVX-NEXT:    vmovd %xmm0, %ecx
+; AVX-NEXT:    andl $15, %ecx
+; AVX-NEXT:    vpextrb $2, %xmm0, %edx
+; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    shll $16, %edx
+; AVX-NEXT:    orl %ecx, %edx
+; AVX-NEXT:    vpextrb $3, %xmm0, %ecx
+; AVX-NEXT:    andl $15, %ecx
+; AVX-NEXT:    shll $24, %ecx
+; AVX-NEXT:    orl %edx, %ecx
+; AVX-NEXT:    movq %rcx, (%rdi)
+; AVX-NEXT:    movq $0, 8(%rdi)
+; AVX-NEXT:    retq
   %res = shufflevector <16 x i4> %arg, <16 x i4> poison,
   <128 x i32> <i32 0     , i32 poison, i32 poison, i32 poison, i32 2     , i32 poison, i32 3     , i32 poison,
               i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
@@ -1122,3 +322,7 @@ define <128 x i4> @avir_v16i4_to_v128i4(<16 x i4> %arg) {
               i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
   ret <128 x i4> %res
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX1: {{.*}}
+; AVX2: {{.*}}
+; AVX512: {{.*}}

```

https://github.com/llvm/llvm-project/pull/178144


More information about the llvm-commits mailing list