[llvm] 6bdd63d - [X86] createVariablePermute - handle case where recursive createVariablePermute call fails
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 27 05:53:06 PST 2020
Author: Simon Pilgrim
Date: 2020-02-27T13:52:31Z
New Revision: 6bdd63dc28208a597542b0c6bc41093f32417804
URL: https://github.com/llvm/llvm-project/commit/6bdd63dc28208a597542b0c6bc41093f32417804
DIFF: https://github.com/llvm/llvm-project/commit/6bdd63dc28208a597542b0c6bc41093f32417804.diff
LOG: [X86] createVariablePermute - handle case where recursive createVariablePermute call fails
Account for the case where a recursive createVariablePermute call with a wider vector type fails.
Original test case from @craig.topper (Craig Topper)
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/var-permute-512.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1f132bc768cc..1cc9c7b16d78 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9631,9 +9631,11 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
IndicesVT = EVT(VT).changeVectorElementTypeToInteger();
IndicesVec = widenSubVector(IndicesVT.getSimpleVT(), IndicesVec, false,
Subtarget, DAG, SDLoc(IndicesVec));
- return extractSubVector(
- createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget), 0,
- DAG, DL, SizeInBits);
+ SDValue NewSrcVec =
+ createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);
+ if (NewSrcVec)
+ return extractSubVector(NewSrcVec, 0, DAG, DL, SizeInBits);
+ return SDValue();
} else if (SrcVec.getValueSizeInBits() < SizeInBits) {
// Widen smaller SrcVec to match VT.
SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec));
diff --git a/llvm/test/CodeGen/X86/var-permute-512.ll b/llvm/test/CodeGen/X86/var-permute-512.ll
index ad1eddc15ac9..d2c7b94cb332 100644
--- a/llvm/test/CodeGen/X86/var-permute-512.ll
+++ b/llvm/test/CodeGen/X86/var-permute-512.ll
@@ -1232,3 +1232,904 @@ define <16 x float> @var_shuffle_v16f32(<16 x float> %v, <16 x i32> %indices) no
%ret15 = insertelement <16 x float> %ret14, float %v15, i32 15
ret <16 x float> %ret15
}
+
+define void @var_cvt_shuffle_v64f32_v64i8_idx(<64 x float>* %dst, <64 x i8> %src, i32 %b) nounwind {
+; AVX512F-LABEL: var_cvt_shuffle_v64f32_v64i8_idx:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: pushq %rbp
+; AVX512F-NEXT: movq %rsp, %rbp
+; AVX512F-NEXT: andq $-64, %rsp
+; AVX512F-NEXT: subq $4160, %rsp # imm = 0x1040
+; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX512F-NEXT: vpbroadcastd %esi, %zmm4
+; AVX512F-NEXT: vpaddd {{.*}}(%rip), %zmm4, %zmm1
+; AVX512F-NEXT: vpaddd {{.*}}(%rip), %zmm4, %zmm2
+; AVX512F-NEXT: vpaddd {{.*}}(%rip), %zmm4, %zmm3
+; AVX512F-NEXT: vpaddd {{.*}}(%rip), %zmm4, %zmm4
+; AVX512F-NEXT: vmovd %xmm4, %eax
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovaps %ymm0, (%rsp)
+; AVX512F-NEXT: movzbl 3968(%rsp,%rax), %eax
+; AVX512F-NEXT: vmovd %eax, %xmm0
+; AVX512F-NEXT: vpextrd $1, %xmm4, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $1, 3904(%rsp,%rax), %xmm0, %xmm0
+; AVX512F-NEXT: vpextrd $2, %xmm4, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $2, 3840(%rsp,%rax), %xmm0, %xmm0
+; AVX512F-NEXT: vpextrd $3, %xmm4, %eax
+; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm5
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $3, 3776(%rsp,%rax), %xmm0, %xmm0
+; AVX512F-NEXT: vmovd %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $4, 3712(%rsp,%rax), %xmm0, %xmm0
+; AVX512F-NEXT: vpextrd $1, %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $5, 3648(%rsp,%rax), %xmm0, %xmm0
+; AVX512F-NEXT: vpextrd $2, %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $6, 3584(%rsp,%rax), %xmm0, %xmm0
+; AVX512F-NEXT: vpextrd $3, %xmm5, %eax
+; AVX512F-NEXT: vextracti32x4 $2, %zmm4, %xmm5
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $7, 3520(%rsp,%rax), %xmm0, %xmm0
+; AVX512F-NEXT: vmovd %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $8, 3456(%rsp,%rax), %xmm0, %xmm0
+; AVX512F-NEXT: vpextrd $1, %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $9, 3392(%rsp,%rax), %xmm0, %xmm0
+; AVX512F-NEXT: vpextrd $2, %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $10, 3328(%rsp,%rax), %xmm0, %xmm0
+; AVX512F-NEXT: vpextrd $3, %xmm5, %eax
+; AVX512F-NEXT: vextracti32x4 $3, %zmm4, %xmm4
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $11, 3264(%rsp,%rax), %xmm0, %xmm0
+; AVX512F-NEXT: vmovd %xmm4, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $12, 3200(%rsp,%rax), %xmm0, %xmm0
+; AVX512F-NEXT: vpextrd $1, %xmm4, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $13, 3136(%rsp,%rax), %xmm0, %xmm0
+; AVX512F-NEXT: vpextrd $2, %xmm4, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $14, 3072(%rsp,%rax), %xmm0, %xmm0
+; AVX512F-NEXT: vpextrd $3, %xmm4, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $15, 3008(%rsp,%rax), %xmm0, %xmm0
+; AVX512F-NEXT: vmovd %xmm3, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: movzbl 2944(%rsp,%rax), %eax
+; AVX512F-NEXT: vmovd %eax, %xmm4
+; AVX512F-NEXT: vpextrd $1, %xmm3, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $1, 2880(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $2, %xmm3, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $2, 2816(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $3, %xmm3, %eax
+; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm5
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $3, 2752(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vmovd %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $4, 2688(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $1, %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $5, 2624(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $2, %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $6, 2560(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $3, %xmm5, %eax
+; AVX512F-NEXT: vextracti32x4 $2, %zmm3, %xmm5
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $7, 2496(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vmovd %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $8, 2432(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $1, %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $9, 2368(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $2, %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $10, 2304(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $3, %xmm5, %eax
+; AVX512F-NEXT: vextracti32x4 $3, %zmm3, %xmm3
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $11, 2240(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vmovd %xmm3, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $12, 2176(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $1, %xmm3, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $13, 2112(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $2, %xmm3, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $14, 2048(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $3, %xmm3, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $15, 1984(%rsp,%rax), %xmm4, %xmm3
+; AVX512F-NEXT: vmovd %xmm2, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: movzbl 1920(%rsp,%rax), %eax
+; AVX512F-NEXT: vmovd %eax, %xmm4
+; AVX512F-NEXT: vpextrd $1, %xmm2, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $1, 1856(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $2, %xmm2, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $2, 1792(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $3, %xmm2, %eax
+; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm5
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $3, 1728(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vmovd %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $4, 1664(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $1, %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $5, 1600(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $2, %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $6, 1536(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $3, %xmm5, %eax
+; AVX512F-NEXT: vextracti32x4 $2, %zmm2, %xmm5
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $7, 1472(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vmovd %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $8, 1408(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $1, %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $9, 1344(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $2, %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $10, 1280(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $3, %xmm5, %eax
+; AVX512F-NEXT: vextracti32x4 $3, %zmm2, %xmm2
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $11, 1216(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vmovd %xmm2, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $12, 1152(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $1, %xmm2, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $13, 1088(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $2, %xmm2, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $14, 1024(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $3, %xmm2, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $15, 960(%rsp,%rax), %xmm4, %xmm2
+; AVX512F-NEXT: vmovd %xmm1, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: movzbl 896(%rsp,%rax), %eax
+; AVX512F-NEXT: vmovd %eax, %xmm4
+; AVX512F-NEXT: vpextrd $1, %xmm1, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $1, 832(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $2, %xmm1, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $2, 768(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $3, %xmm1, %eax
+; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm5
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $3, 704(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vmovd %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $4, 640(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $1, %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $5, 576(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $2, %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $6, 512(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $3, %xmm5, %eax
+; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm5
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $7, 448(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vmovd %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $8, 384(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $1, %xmm5, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $9, 320(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: andl $63, %esi
+; AVX512F-NEXT: vpinsrb $10, 4032(%rsp,%rsi), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $3, %xmm5, %eax
+; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm1
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $11, 256(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vmovd %xmm1, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $12, 192(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $1, %xmm1, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $13, 128(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $2, %xmm1, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $14, 64(%rsp,%rax), %xmm4, %xmm4
+; AVX512F-NEXT: vpextrd $3, %xmm1, %eax
+; AVX512F-NEXT: andl $63, %eax
+; AVX512F-NEXT: vpinsrb $15, (%rsp,%rax), %xmm4, %xmm1
+; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT: vcvtdq2ps %zmm0, %zmm0
+; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
+; AVX512F-NEXT: vcvtdq2ps %zmm3, %zmm3
+; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
+; AVX512F-NEXT: vcvtdq2ps %zmm2, %zmm2
+; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
+; AVX512F-NEXT: vcvtdq2ps %zmm1, %zmm1
+; AVX512F-NEXT: vmovaps %zmm1, 192(%rdi)
+; AVX512F-NEXT: vmovaps %zmm2, 128(%rdi)
+; AVX512F-NEXT: vmovaps %zmm3, 64(%rdi)
+; AVX512F-NEXT: vmovaps %zmm0, (%rdi)
+; AVX512F-NEXT: movq %rbp, %rsp
+; AVX512F-NEXT: popq %rbp
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: var_cvt_shuffle_v64f32_v64i8_idx:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: pushq %rbp
+; AVX512BW-NEXT: movq %rsp, %rbp
+; AVX512BW-NEXT: andq $-64, %rsp
+; AVX512BW-NEXT: subq $128, %rsp
+; AVX512BW-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX512BW-NEXT: vpbroadcastd %esi, %zmm2
+; AVX512BW-NEXT: vpaddd {{.*}}(%rip), %zmm2, %zmm1
+; AVX512BW-NEXT: vmovd %xmm1, %eax
+; AVX512BW-NEXT: vmovaps %zmm0, (%rsp)
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
+; AVX512BW-NEXT: vmovd %eax, %xmm0
+; AVX512BW-NEXT: vpextrd $1, %xmm1, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $1, (%rsp,%rax), %xmm0, %xmm0
+; AVX512BW-NEXT: vpextrd $2, %xmm1, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $2, (%rsp,%rax), %xmm0, %xmm0
+; AVX512BW-NEXT: vpextrd $3, %xmm1, %eax
+; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm3
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $3, (%rsp,%rax), %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm3, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $4, (%rsp,%rax), %xmm0, %xmm0
+; AVX512BW-NEXT: vpextrd $1, %xmm3, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $5, (%rsp,%rax), %xmm0, %xmm0
+; AVX512BW-NEXT: vpextrd $2, %xmm3, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $6, (%rsp,%rax), %xmm0, %xmm0
+; AVX512BW-NEXT: vpextrd $3, %xmm3, %eax
+; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm4
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $7, (%rsp,%rax), %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm4, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $8, (%rsp,%rax), %xmm0, %xmm0
+; AVX512BW-NEXT: vpextrd $1, %xmm4, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $9, (%rsp,%rax), %xmm0, %xmm0
+; AVX512BW-NEXT: vpaddd {{.*}}(%rip), %zmm2, %zmm5
+; AVX512BW-NEXT: vpaddd {{.*}}(%rip), %zmm2, %zmm3
+; AVX512BW-NEXT: vpaddd {{.*}}(%rip), %zmm2, %zmm2
+; AVX512BW-NEXT: andl $63, %esi
+; AVX512BW-NEXT: vpinsrb $10, (%rsp,%rsi), %xmm0, %xmm0
+; AVX512BW-NEXT: vpextrd $3, %xmm4, %eax
+; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm1
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $11, (%rsp,%rax), %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm1, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $12, (%rsp,%rax), %xmm0, %xmm0
+; AVX512BW-NEXT: vpextrd $1, %xmm1, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $13, (%rsp,%rax), %xmm0, %xmm0
+; AVX512BW-NEXT: vpextrd $2, %xmm1, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $14, (%rsp,%rax), %xmm0, %xmm0
+; AVX512BW-NEXT: vpextrd $3, %xmm1, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm8
+; AVX512BW-NEXT: vmovd %xmm5, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
+; AVX512BW-NEXT: vmovd %eax, %xmm1
+; AVX512BW-NEXT: vpextrd $1, %xmm5, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $1, (%rsp,%rax), %xmm1, %xmm1
+; AVX512BW-NEXT: vpextrd $2, %xmm5, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $2, (%rsp,%rax), %xmm1, %xmm1
+; AVX512BW-NEXT: vpextrd $3, %xmm5, %eax
+; AVX512BW-NEXT: vextracti128 $1, %ymm5, %xmm4
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $3, (%rsp,%rax), %xmm1, %xmm1
+; AVX512BW-NEXT: vmovd %xmm4, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $4, (%rsp,%rax), %xmm1, %xmm1
+; AVX512BW-NEXT: vpextrd $1, %xmm4, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $5, (%rsp,%rax), %xmm1, %xmm1
+; AVX512BW-NEXT: vpextrd $2, %xmm4, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $6, (%rsp,%rax), %xmm1, %xmm1
+; AVX512BW-NEXT: vpextrd $3, %xmm4, %eax
+; AVX512BW-NEXT: vextracti32x4 $2, %zmm5, %xmm4
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $7, (%rsp,%rax), %xmm1, %xmm1
+; AVX512BW-NEXT: vmovd %xmm4, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $8, (%rsp,%rax), %xmm1, %xmm1
+; AVX512BW-NEXT: vpextrd $1, %xmm4, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $9, (%rsp,%rax), %xmm1, %xmm1
+; AVX512BW-NEXT: vpextrd $2, %xmm4, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $10, (%rsp,%rax), %xmm1, %xmm6
+; AVX512BW-NEXT: vpextrd $3, %xmm4, %eax
+; AVX512BW-NEXT: vextracti32x4 $3, %zmm5, %xmm1
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $11, (%rsp,%rax), %xmm6, %xmm4
+; AVX512BW-NEXT: vmovd %xmm1, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $12, (%rsp,%rax), %xmm4, %xmm4
+; AVX512BW-NEXT: vmovd %xmm3, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
+; AVX512BW-NEXT: vmovd %eax, %xmm5
+; AVX512BW-NEXT: vpextrd $1, %xmm3, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $1, (%rsp,%rax), %xmm5, %xmm5
+; AVX512BW-NEXT: vpextrd $2, %xmm3, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $2, (%rsp,%rax), %xmm5, %xmm5
+; AVX512BW-NEXT: vpextrd $3, %xmm3, %eax
+; AVX512BW-NEXT: vextracti128 $1, %ymm3, %xmm6
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $3, (%rsp,%rax), %xmm5, %xmm5
+; AVX512BW-NEXT: vmovd %xmm6, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $4, (%rsp,%rax), %xmm5, %xmm5
+; AVX512BW-NEXT: vpextrd $1, %xmm6, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $5, (%rsp,%rax), %xmm5, %xmm5
+; AVX512BW-NEXT: vpextrd $2, %xmm6, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $6, (%rsp,%rax), %xmm5, %xmm7
+; AVX512BW-NEXT: vpextrd $3, %xmm6, %eax
+; AVX512BW-NEXT: vextracti32x4 $2, %zmm3, %xmm5
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $7, (%rsp,%rax), %xmm7, %xmm6
+; AVX512BW-NEXT: vmovd %xmm5, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $8, (%rsp,%rax), %xmm6, %xmm6
+; AVX512BW-NEXT: vpextrd $1, %xmm5, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $9, (%rsp,%rax), %xmm6, %xmm6
+; AVX512BW-NEXT: vpextrd $2, %xmm5, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $10, (%rsp,%rax), %xmm6, %xmm6
+; AVX512BW-NEXT: vmovd %xmm2, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
+; AVX512BW-NEXT: vmovd %eax, %xmm7
+; AVX512BW-NEXT: vpextrd $1, %xmm2, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $1, (%rsp,%rax), %xmm7, %xmm7
+; AVX512BW-NEXT: vpextrd $2, %xmm2, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $2, (%rsp,%rax), %xmm7, %xmm7
+; AVX512BW-NEXT: vpextrd $3, %xmm2, %eax
+; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm0
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $3, (%rsp,%rax), %xmm7, %xmm7
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $4, (%rsp,%rax), %xmm7, %xmm7
+; AVX512BW-NEXT: vpextrd $1, %xmm0, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $5, (%rsp,%rax), %xmm7, %xmm7
+; AVX512BW-NEXT: vpextrd $2, %xmm0, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $6, (%rsp,%rax), %xmm7, %xmm7
+; AVX512BW-NEXT: vpextrd $3, %xmm0, %eax
+; AVX512BW-NEXT: vextracti32x4 $2, %zmm2, %xmm0
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $7, (%rsp,%rax), %xmm7, %xmm7
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $8, (%rsp,%rax), %xmm7, %xmm7
+; AVX512BW-NEXT: vpextrd $1, %xmm0, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: vpinsrb $9, (%rsp,%rax), %xmm7, %xmm7
+; AVX512BW-NEXT: vpextrd $2, %xmm0, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
+; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm7, %xmm7
+; AVX512BW-NEXT: vpextrd $3, %xmm0, %eax
+; AVX512BW-NEXT: vextracti32x4 $3, %zmm2, %xmm0
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
+; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm7, %xmm2
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
+; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; AVX512BW-NEXT: vpextrd $1, %xmm0, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
+; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; AVX512BW-NEXT: vpextrd $2, %xmm0, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
+; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; AVX512BW-NEXT: vpextrd $3, %xmm0, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
+; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0
+; AVX512BW-NEXT: vpextrd $3, %xmm5, %eax
+; AVX512BW-NEXT: vextracti32x4 $3, %zmm3, %xmm2
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
+; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm6, %xmm3
+; AVX512BW-NEXT: vmovd %xmm2, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
+; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; AVX512BW-NEXT: vpextrd $1, %xmm2, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
+; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; AVX512BW-NEXT: vpextrd $2, %xmm2, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
+; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
+; AVX512BW-NEXT: vpextrd $3, %xmm2, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
+; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2
+; AVX512BW-NEXT: vpextrd $1, %xmm1, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
+; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm3
+; AVX512BW-NEXT: vpextrd $2, %xmm1, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
+; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
+; AVX512BW-NEXT: vpextrd $3, %xmm1, %eax
+; AVX512BW-NEXT: andl $63, %eax
+; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
+; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm1
+; AVX512BW-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512BW-NEXT: vcvtdq2ps %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovsxbd %xmm2, %zmm2
+; AVX512BW-NEXT: vcvtdq2ps %zmm2, %zmm2
+; AVX512BW-NEXT: vpmovsxbd %xmm1, %zmm1
+; AVX512BW-NEXT: vcvtdq2ps %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovsxbd %xmm8, %zmm3
+; AVX512BW-NEXT: vcvtdq2ps %zmm3, %zmm3
+; AVX512BW-NEXT: vmovaps %zmm3, 192(%rdi)
+; AVX512BW-NEXT: vmovaps %zmm1, 128(%rdi)
+; AVX512BW-NEXT: vmovaps %zmm2, 64(%rdi)
+; AVX512BW-NEXT: vmovaps %zmm0, (%rdi)
+; AVX512BW-NEXT: movq %rbp, %rsp
+; AVX512BW-NEXT: popq %rbp
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VBMI-LABEL: var_cvt_shuffle_v64f32_v64i8_idx:
+; AVX512VBMI: # %bb.0:
+; AVX512VBMI-NEXT: pushq %rbp
+; AVX512VBMI-NEXT: movq %rsp, %rbp
+; AVX512VBMI-NEXT: andq $-64, %rsp
+; AVX512VBMI-NEXT: subq $128, %rsp
+; AVX512VBMI-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX512VBMI-NEXT: vpbroadcastd %esi, %zmm1
+; AVX512VBMI-NEXT: vpaddd {{.*}}(%rip), %zmm1, %zmm2
+; AVX512VBMI-NEXT: vmovd %xmm2, %eax
+; AVX512VBMI-NEXT: vmovdqa64 %zmm0, (%rsp)
+; AVX512VBMI-NEXT: andl $63, %eax
+; AVX512VBMI-NEXT: movzbl (%rsp,%rax), %eax
+; AVX512VBMI-NEXT: vmovd %eax, %xmm3
+; AVX512VBMI-NEXT: vpextrd $1, %xmm2, %eax
+; AVX512VBMI-NEXT: andl $63, %eax
+; AVX512VBMI-NEXT: vpinsrb $1, (%rsp,%rax), %xmm3, %xmm3
+; AVX512VBMI-NEXT: vpextrd $2, %xmm2, %eax
+; AVX512VBMI-NEXT: andl $63, %eax
+; AVX512VBMI-NEXT: vpinsrb $2, (%rsp,%rax), %xmm3, %xmm3
+; AVX512VBMI-NEXT: vpextrd $3, %xmm2, %eax
+; AVX512VBMI-NEXT: vextracti128 $1, %ymm2, %xmm4
+; AVX512VBMI-NEXT: andl $63, %eax
+; AVX512VBMI-NEXT: vpinsrb $3, (%rsp,%rax), %xmm3, %xmm3
+; AVX512VBMI-NEXT: vmovd %xmm4, %eax
+; AVX512VBMI-NEXT: andl $63, %eax
+; AVX512VBMI-NEXT: vpinsrb $4, (%rsp,%rax), %xmm3, %xmm3
+; AVX512VBMI-NEXT: vpextrd $1, %xmm4, %eax
+; AVX512VBMI-NEXT: andl $63, %eax
+; AVX512VBMI-NEXT: vpinsrb $5, (%rsp,%rax), %xmm3, %xmm3
+; AVX512VBMI-NEXT: vpextrd $2, %xmm4, %eax
+; AVX512VBMI-NEXT: andl $63, %eax
+; AVX512VBMI-NEXT: vpinsrb $6, (%rsp,%rax), %xmm3, %xmm3
+; AVX512VBMI-NEXT: vpextrd $3, %xmm4, %eax
+; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm2, %xmm4
+; AVX512VBMI-NEXT: andl $63, %eax
+; AVX512VBMI-NEXT: vpinsrb $7, (%rsp,%rax), %xmm3, %xmm3
+; AVX512VBMI-NEXT: vmovd %xmm4, %eax
+; AVX512VBMI-NEXT: andl $63, %eax
+; AVX512VBMI-NEXT: vpinsrb $8, (%rsp,%rax), %xmm3, %xmm3
+; AVX512VBMI-NEXT: vpextrd $1, %xmm4, %eax
+; AVX512VBMI-NEXT: andl $63, %eax
+; AVX512VBMI-NEXT: vpinsrb $9, (%rsp,%rax), %xmm3, %xmm3
+; AVX512VBMI-NEXT: andl $63, %esi
+; AVX512VBMI-NEXT: vpinsrb $10, (%rsp,%rsi), %xmm3, %xmm3
+; AVX512VBMI-NEXT: vpextrd $3, %xmm4, %eax
+; AVX512VBMI-NEXT: vextracti32x4 $3, %zmm2, %xmm2
+; AVX512VBMI-NEXT: andl $63, %eax
+; AVX512VBMI-NEXT: vpinsrb $11, (%rsp,%rax), %xmm3, %xmm3
+; AVX512VBMI-NEXT: vmovd %xmm2, %eax
+; AVX512VBMI-NEXT: andl $63, %eax
+; AVX512VBMI-NEXT: vpinsrb $12, (%rsp,%rax), %xmm3, %xmm3
+; AVX512VBMI-NEXT: vpextrd $1, %xmm2, %eax
+; AVX512VBMI-NEXT: andl $63, %eax
+; AVX512VBMI-NEXT: vpinsrb $13, (%rsp,%rax), %xmm3, %xmm3
+; AVX512VBMI-NEXT: vpextrd $2, %xmm2, %eax
+; AVX512VBMI-NEXT: andl $63, %eax
+; AVX512VBMI-NEXT: vpinsrb $14, (%rsp,%rax), %xmm3, %xmm3
+; AVX512VBMI-NEXT: vpextrd $3, %xmm2, %eax
+; AVX512VBMI-NEXT: andl $63, %eax
+; AVX512VBMI-NEXT: vpinsrb $15, (%rsp,%rax), %xmm3, %xmm2
+; AVX512VBMI-NEXT: vpaddd {{.*}}(%rip), %zmm1, %zmm3
+; AVX512VBMI-NEXT: vpmovdb %zmm3, %xmm3
+; AVX512VBMI-NEXT: vpaddd {{.*}}(%rip), %zmm1, %zmm4
+; AVX512VBMI-NEXT: vpmovdb %zmm4, %xmm4
+; AVX512VBMI-NEXT: vpaddd {{.*}}(%rip), %zmm1, %zmm1
+; AVX512VBMI-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512VBMI-NEXT: vpmovsxbd %xmm2, %zmm2
+; AVX512VBMI-NEXT: vcvtdq2ps %zmm2, %zmm2
+; AVX512VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm1
+; AVX512VBMI-NEXT: vpmovsxbd %xmm1, %zmm1
+; AVX512VBMI-NEXT: vcvtdq2ps %zmm1, %zmm1
+; AVX512VBMI-NEXT: vpermb %zmm0, %zmm4, %zmm4
+; AVX512VBMI-NEXT: vpmovsxbd %xmm4, %zmm4
+; AVX512VBMI-NEXT: vcvtdq2ps %zmm4, %zmm4
+; AVX512VBMI-NEXT: vpermb %zmm0, %zmm3, %zmm0
+; AVX512VBMI-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512VBMI-NEXT: vcvtdq2ps %zmm0, %zmm0
+; AVX512VBMI-NEXT: vmovaps %zmm0, 128(%rdi)
+; AVX512VBMI-NEXT: vmovaps %zmm4, 64(%rdi)
+; AVX512VBMI-NEXT: vmovaps %zmm1, (%rdi)
+; AVX512VBMI-NEXT: vmovaps %zmm2, 192(%rdi)
+; AVX512VBMI-NEXT: movq %rbp, %rsp
+; AVX512VBMI-NEXT: popq %rbp
+; AVX512VBMI-NEXT: vzeroupper
+; AVX512VBMI-NEXT: retq
+ %b_broadcast_init = insertelement <64 x i32> undef, i32 %b, i32 0
+ %b_broadcast = shufflevector <64 x i32> %b_broadcast_init, <64 x i32> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %sub_add__b_broadcast_ = add <64 x i32> %b_broadcast, <i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 undef, i32 -1, i32 -2, i32 -3, i32 -4, i32 -5>
+ %index_0.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 0
+ %index_1.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 1
+ %index_2.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 2
+ %index_3.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 3
+ %index_4.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 4
+ %index_5.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 5
+ %index_6.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 6
+ %index_7.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 7
+ %index_8.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 8
+ %index_9.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 9
+ %index_10.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 10
+ %index_11.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 11
+ %index_12.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 12
+ %index_13.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 13
+ %index_14.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 14
+ %index_15.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 15
+ %index_16.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 16
+ %index_17.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 17
+ %index_18.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 18
+ %index_19.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 19
+ %index_20.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 20
+ %index_21.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 21
+ %index_22.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 22
+ %index_23.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 23
+ %index_24.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 24
+ %index_25.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 25
+ %index_26.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 26
+ %index_27.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 27
+ %index_28.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 28
+ %index_29.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 29
+ %index_30.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 30
+ %index_31.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 31
+ %index_32.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 32
+ %index_33.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 33
+ %index_34.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 34
+ %index_35.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 35
+ %index_36.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 36
+ %index_37.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 37
+ %index_38.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 38
+ %index_39.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 39
+ %index_40.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 40
+ %index_41.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 41
+ %index_42.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 42
+ %index_43.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 43
+ %index_44.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 44
+ %index_45.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 45
+ %index_46.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 46
+ %index_47.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 47
+ %index_48.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 48
+ %index_49.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 49
+ %index_50.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 50
+ %index_51.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 51
+ %index_52.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 52
+ %index_53.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 53
+ %index_54.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 54
+ %index_55.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 55
+ %index_56.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 56
+ %index_57.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 57
+ %index_59.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 59
+ %index_60.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 60
+ %index_61.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 61
+ %index_62.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 62
+ %index_63.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 63
+ %v_0.i.i = extractelement <64 x i8> %src, i32 %index_0.i.i
+ %v_1.i.i = extractelement <64 x i8> %src, i32 %index_1.i.i
+ %v_2.i.i = extractelement <64 x i8> %src, i32 %index_2.i.i
+ %v_3.i.i = extractelement <64 x i8> %src, i32 %index_3.i.i
+ %v_4.i.i = extractelement <64 x i8> %src, i32 %index_4.i.i
+ %v_5.i.i = extractelement <64 x i8> %src, i32 %index_5.i.i
+ %v_6.i.i = extractelement <64 x i8> %src, i32 %index_6.i.i
+ %v_7.i.i = extractelement <64 x i8> %src, i32 %index_7.i.i
+ %v_8.i.i = extractelement <64 x i8> %src, i32 %index_8.i.i
+ %v_9.i.i = extractelement <64 x i8> %src, i32 %index_9.i.i
+ %v_10.i.i = extractelement <64 x i8> %src, i32 %index_10.i.i
+ %v_11.i.i = extractelement <64 x i8> %src, i32 %index_11.i.i
+ %v_12.i.i = extractelement <64 x i8> %src, i32 %index_12.i.i
+ %v_13.i.i = extractelement <64 x i8> %src, i32 %index_13.i.i
+ %v_14.i.i = extractelement <64 x i8> %src, i32 %index_14.i.i
+ %v_15.i.i = extractelement <64 x i8> %src, i32 %index_15.i.i
+ %v_16.i.i = extractelement <64 x i8> %src, i32 %index_16.i.i
+ %v_17.i.i = extractelement <64 x i8> %src, i32 %index_17.i.i
+ %v_18.i.i = extractelement <64 x i8> %src, i32 %index_18.i.i
+ %v_19.i.i = extractelement <64 x i8> %src, i32 %index_19.i.i
+ %v_20.i.i = extractelement <64 x i8> %src, i32 %index_20.i.i
+ %v_21.i.i = extractelement <64 x i8> %src, i32 %index_21.i.i
+ %v_22.i.i = extractelement <64 x i8> %src, i32 %index_22.i.i
+ %v_23.i.i = extractelement <64 x i8> %src, i32 %index_23.i.i
+ %v_24.i.i = extractelement <64 x i8> %src, i32 %index_24.i.i
+ %v_25.i.i = extractelement <64 x i8> %src, i32 %index_25.i.i
+ %v_26.i.i = extractelement <64 x i8> %src, i32 %index_26.i.i
+ %v_27.i.i = extractelement <64 x i8> %src, i32 %index_27.i.i
+ %v_28.i.i = extractelement <64 x i8> %src, i32 %index_28.i.i
+ %v_29.i.i = extractelement <64 x i8> %src, i32 %index_29.i.i
+ %v_30.i.i = extractelement <64 x i8> %src, i32 %index_30.i.i
+ %v_31.i.i = extractelement <64 x i8> %src, i32 %index_31.i.i
+ %v_32.i.i = extractelement <64 x i8> %src, i32 %index_32.i.i
+ %v_33.i.i = extractelement <64 x i8> %src, i32 %index_33.i.i
+ %v_34.i.i = extractelement <64 x i8> %src, i32 %index_34.i.i
+ %v_35.i.i = extractelement <64 x i8> %src, i32 %index_35.i.i
+ %v_36.i.i = extractelement <64 x i8> %src, i32 %index_36.i.i
+ %v_37.i.i = extractelement <64 x i8> %src, i32 %index_37.i.i
+ %v_38.i.i = extractelement <64 x i8> %src, i32 %index_38.i.i
+ %v_39.i.i = extractelement <64 x i8> %src, i32 %index_39.i.i
+ %v_40.i.i = extractelement <64 x i8> %src, i32 %index_40.i.i
+ %v_41.i.i = extractelement <64 x i8> %src, i32 %index_41.i.i
+ %v_42.i.i = extractelement <64 x i8> %src, i32 %index_42.i.i
+ %v_43.i.i = extractelement <64 x i8> %src, i32 %index_43.i.i
+ %v_44.i.i = extractelement <64 x i8> %src, i32 %index_44.i.i
+ %v_45.i.i = extractelement <64 x i8> %src, i32 %index_45.i.i
+ %v_46.i.i = extractelement <64 x i8> %src, i32 %index_46.i.i
+ %v_47.i.i = extractelement <64 x i8> %src, i32 %index_47.i.i
+ %v_48.i.i = extractelement <64 x i8> %src, i32 %index_48.i.i
+ %v_49.i.i = extractelement <64 x i8> %src, i32 %index_49.i.i
+ %v_50.i.i = extractelement <64 x i8> %src, i32 %index_50.i.i
+ %v_51.i.i = extractelement <64 x i8> %src, i32 %index_51.i.i
+ %v_52.i.i = extractelement <64 x i8> %src, i32 %index_52.i.i
+ %v_53.i.i = extractelement <64 x i8> %src, i32 %index_53.i.i
+ %v_54.i.i = extractelement <64 x i8> %src, i32 %index_54.i.i
+ %v_55.i.i = extractelement <64 x i8> %src, i32 %index_55.i.i
+ %v_56.i.i = extractelement <64 x i8> %src, i32 %index_56.i.i
+ %v_57.i.i = extractelement <64 x i8> %src, i32 %index_57.i.i
+ %v_58.i.i = extractelement <64 x i8> %src, i32 %b
+ %v_59.i.i = extractelement <64 x i8> %src, i32 %index_59.i.i
+ %v_60.i.i = extractelement <64 x i8> %src, i32 %index_60.i.i
+ %v_61.i.i = extractelement <64 x i8> %src, i32 %index_61.i.i
+ %v_62.i.i = extractelement <64 x i8> %src, i32 %index_62.i.i
+ %v_63.i.i = extractelement <64 x i8> %src, i32 %index_63.i.i
+ %dst_0.i.i = insertelement <64 x i8> undef, i8 %v_0.i.i, i32 0
+ %dst_1.i.i = insertelement <64 x i8> %dst_0.i.i, i8 %v_1.i.i, i32 1
+ %dst_2.i.i = insertelement <64 x i8> %dst_1.i.i, i8 %v_2.i.i, i32 2
+ %dst_3.i.i = insertelement <64 x i8> %dst_2.i.i, i8 %v_3.i.i, i32 3
+ %dst_4.i.i = insertelement <64 x i8> %dst_3.i.i, i8 %v_4.i.i, i32 4
+ %dst_5.i.i = insertelement <64 x i8> %dst_4.i.i, i8 %v_5.i.i, i32 5
+ %dst_6.i.i = insertelement <64 x i8> %dst_5.i.i, i8 %v_6.i.i, i32 6
+ %dst_7.i.i = insertelement <64 x i8> %dst_6.i.i, i8 %v_7.i.i, i32 7
+ %dst_8.i.i = insertelement <64 x i8> %dst_7.i.i, i8 %v_8.i.i, i32 8
+ %dst_9.i.i = insertelement <64 x i8> %dst_8.i.i, i8 %v_9.i.i, i32 9
+ %dst_10.i.i = insertelement <64 x i8> %dst_9.i.i, i8 %v_10.i.i, i32 10
+ %dst_11.i.i = insertelement <64 x i8> %dst_10.i.i, i8 %v_11.i.i, i32 11
+ %dst_12.i.i = insertelement <64 x i8> %dst_11.i.i, i8 %v_12.i.i, i32 12
+ %dst_13.i.i = insertelement <64 x i8> %dst_12.i.i, i8 %v_13.i.i, i32 13
+ %dst_14.i.i = insertelement <64 x i8> %dst_13.i.i, i8 %v_14.i.i, i32 14
+ %dst_15.i.i = insertelement <64 x i8> %dst_14.i.i, i8 %v_15.i.i, i32 15
+ %dst_16.i.i = insertelement <64 x i8> %dst_15.i.i, i8 %v_16.i.i, i32 16
+ %dst_17.i.i = insertelement <64 x i8> %dst_16.i.i, i8 %v_17.i.i, i32 17
+ %dst_18.i.i = insertelement <64 x i8> %dst_17.i.i, i8 %v_18.i.i, i32 18
+ %dst_19.i.i = insertelement <64 x i8> %dst_18.i.i, i8 %v_19.i.i, i32 19
+ %dst_20.i.i = insertelement <64 x i8> %dst_19.i.i, i8 %v_20.i.i, i32 20
+ %dst_21.i.i = insertelement <64 x i8> %dst_20.i.i, i8 %v_21.i.i, i32 21
+ %dst_22.i.i = insertelement <64 x i8> %dst_21.i.i, i8 %v_22.i.i, i32 22
+ %dst_23.i.i = insertelement <64 x i8> %dst_22.i.i, i8 %v_23.i.i, i32 23
+ %dst_24.i.i = insertelement <64 x i8> %dst_23.i.i, i8 %v_24.i.i, i32 24
+ %dst_25.i.i = insertelement <64 x i8> %dst_24.i.i, i8 %v_25.i.i, i32 25
+ %dst_26.i.i = insertelement <64 x i8> %dst_25.i.i, i8 %v_26.i.i, i32 26
+ %dst_27.i.i = insertelement <64 x i8> %dst_26.i.i, i8 %v_27.i.i, i32 27
+ %dst_28.i.i = insertelement <64 x i8> %dst_27.i.i, i8 %v_28.i.i, i32 28
+ %dst_29.i.i = insertelement <64 x i8> %dst_28.i.i, i8 %v_29.i.i, i32 29
+ %dst_30.i.i = insertelement <64 x i8> %dst_29.i.i, i8 %v_30.i.i, i32 30
+ %dst_31.i.i = insertelement <64 x i8> %dst_30.i.i, i8 %v_31.i.i, i32 31
+ %dst_32.i.i = insertelement <64 x i8> %dst_31.i.i, i8 %v_32.i.i, i32 32
+ %dst_33.i.i = insertelement <64 x i8> %dst_32.i.i, i8 %v_33.i.i, i32 33
+ %dst_34.i.i = insertelement <64 x i8> %dst_33.i.i, i8 %v_34.i.i, i32 34
+ %dst_35.i.i = insertelement <64 x i8> %dst_34.i.i, i8 %v_35.i.i, i32 35
+ %dst_36.i.i = insertelement <64 x i8> %dst_35.i.i, i8 %v_36.i.i, i32 36
+ %dst_37.i.i = insertelement <64 x i8> %dst_36.i.i, i8 %v_37.i.i, i32 37
+ %dst_38.i.i = insertelement <64 x i8> %dst_37.i.i, i8 %v_38.i.i, i32 38
+ %dst_39.i.i = insertelement <64 x i8> %dst_38.i.i, i8 %v_39.i.i, i32 39
+ %dst_40.i.i = insertelement <64 x i8> %dst_39.i.i, i8 %v_40.i.i, i32 40
+ %dst_41.i.i = insertelement <64 x i8> %dst_40.i.i, i8 %v_41.i.i, i32 41
+ %dst_42.i.i = insertelement <64 x i8> %dst_41.i.i, i8 %v_42.i.i, i32 42
+ %dst_43.i.i = insertelement <64 x i8> %dst_42.i.i, i8 %v_43.i.i, i32 43
+ %dst_44.i.i = insertelement <64 x i8> %dst_43.i.i, i8 %v_44.i.i, i32 44
+ %dst_45.i.i = insertelement <64 x i8> %dst_44.i.i, i8 %v_45.i.i, i32 45
+ %dst_46.i.i = insertelement <64 x i8> %dst_45.i.i, i8 %v_46.i.i, i32 46
+ %dst_47.i.i = insertelement <64 x i8> %dst_46.i.i, i8 %v_47.i.i, i32 47
+ %dst_48.i.i = insertelement <64 x i8> %dst_47.i.i, i8 %v_48.i.i, i32 48
+ %dst_49.i.i = insertelement <64 x i8> %dst_48.i.i, i8 %v_49.i.i, i32 49
+ %dst_50.i.i = insertelement <64 x i8> %dst_49.i.i, i8 %v_50.i.i, i32 50
+ %dst_51.i.i = insertelement <64 x i8> %dst_50.i.i, i8 %v_51.i.i, i32 51
+ %dst_52.i.i = insertelement <64 x i8> %dst_51.i.i, i8 %v_52.i.i, i32 52
+ %dst_53.i.i = insertelement <64 x i8> %dst_52.i.i, i8 %v_53.i.i, i32 53
+ %dst_54.i.i = insertelement <64 x i8> %dst_53.i.i, i8 %v_54.i.i, i32 54
+ %dst_55.i.i = insertelement <64 x i8> %dst_54.i.i, i8 %v_55.i.i, i32 55
+ %dst_56.i.i = insertelement <64 x i8> %dst_55.i.i, i8 %v_56.i.i, i32 56
+ %dst_57.i.i = insertelement <64 x i8> %dst_56.i.i, i8 %v_57.i.i, i32 57
+ %dst_58.i.i = insertelement <64 x i8> %dst_57.i.i, i8 %v_58.i.i, i32 58
+ %dst_59.i.i = insertelement <64 x i8> %dst_58.i.i, i8 %v_59.i.i, i32 59
+ %dst_60.i.i = insertelement <64 x i8> %dst_59.i.i, i8 %v_60.i.i, i32 60
+ %dst_61.i.i = insertelement <64 x i8> %dst_60.i.i, i8 %v_61.i.i, i32 61
+ %dst_62.i.i = insertelement <64 x i8> %dst_61.i.i, i8 %v_62.i.i, i32 62
+ %dst_63.i.i = insertelement <64 x i8> %dst_62.i.i, i8 %v_63.i.i, i32 63
+ %shuf_load_to_float = sitofp <64 x i8> %dst_63.i.i to <64 x float>
+ store <64 x float> %shuf_load_to_float, <64 x float>* %dst
+ ret void
+}
More information about the llvm-commits
mailing list