[llvm] [X86] ldexp-avx512.ll - add v8f16/v16f16/v32f16 test coverage for #165694 (PR #167294)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 10 02:26:38 PST 2025
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/167294
None
>From 76a92d548fa1b232712610eca663a1ff62013be0 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 10 Nov 2025 10:25:49 +0000
Subject: [PATCH] [X86] ldexp-avx512.ll - add v8f16/v16f16/v32f16 test coverage
for #165694
---
llvm/test/CodeGen/X86/ldexp-avx512.ll | 1288 ++++++++++++++++++++++++-
1 file changed, 1285 insertions(+), 3 deletions(-)
diff --git a/llvm/test/CodeGen/X86/ldexp-avx512.ll b/llvm/test/CodeGen/X86/ldexp-avx512.ll
index ea93a911a1ad0..21491bc2cc8f5 100644
--- a/llvm/test/CodeGen/X86/ldexp-avx512.ll
+++ b/llvm/test/CodeGen/X86/ldexp-avx512.ll
@@ -47,6 +47,187 @@ entry:
}
declare fp128 @ldexpl(fp128, i32) memory(none)
+define <8 x half> @test_ldexp_8xhalf(<8 x half> %x, <8 x i16> %exp) nounwind {
+; AVX512-LABEL: test_ldexp_8xhalf:
+; AVX512: # %bb.0:
+; AVX512-NEXT: subq $88, %rsp
+; AVX512-NEXT: vmovdqa %xmm1, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm2, %xmm0
+; AVX512-NEXT: vpextrw $7, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[3,3,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $6, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $5, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,0]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $4, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $3, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,1,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $2, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $1, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vmovd %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512-NEXT: addq $88, %rsp
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: test_ldexp_8xhalf:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: subq $88, %rsp
+; AVX512VL-NEXT: vmovdqa %xmm1, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm0
+; AVX512VL-NEXT: vpextrw $7, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[3,3,3,3]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $6, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $5, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[1,0]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $4, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $3, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[1,1,3,3]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $2, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $1, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovd %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512VL-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512VL-NEXT: addq $88, %rsp
+; AVX512VL-NEXT: retq
+ %r = call <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half> %x, <8 x i16> %exp)
+ ret <8 x half> %r
+}
+declare <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half>, <8 x i16>)
+
define <4 x float> @test_ldexp_4xfloat(<4 x float> %x, <4 x i32> %exp) nounwind {
; CHECK-LABEL: test_ldexp_4xfloat:
; CHECK: # %bb.0:
@@ -109,6 +290,381 @@ define <2 x double> @test_ldexp_2xdouble(<2 x double> %x, <2 x i32> %exp) nounwi
}
declare <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>)
+define <16 x half> @test_ldexp_16xhalf(<16 x half> %x, <16 x i16> %exp) nounwind {
+; AVX512-LABEL: test_ldexp_16xhalf:
+; AVX512: # %bb.0:
+; AVX512-NEXT: subq $168, %rsp
+; AVX512-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm1
+; AVX512-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpextrw $7, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[3,3,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $6, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $5, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,0]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $4, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512-NEXT: vpextrw $7, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[3,3,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512-NEXT: vpextrw $6, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512-NEXT: vpextrw $5, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,0]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512-NEXT: vpextrw $4, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $3, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,1,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $2, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $1, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vmovd %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512-NEXT: vpextrw $3, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,1,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512-NEXT: vpextrw $2, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; AVX512-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512-NEXT: vpextrw $1, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512-NEXT: vmovd %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; AVX512-NEXT: # ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
+; AVX512-NEXT: addq $168, %rsp
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: test_ldexp_16xhalf:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: subq $168, %rsp
+; AVX512VL-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512VL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm1, %xmm1
+; AVX512VL-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpextrw $7, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[3,3,3,3]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $6, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $5, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[1,0]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $4, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512VL-NEXT: vpextrw $7, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[3,3,3,3]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512VL-NEXT: vpextrw $6, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512VL-NEXT: vpextrw $5, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[1,0]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512VL-NEXT: vpextrw $4, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512VL-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512VL-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $3, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[1,1,3,3]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $2, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $1, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovd %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512VL-NEXT: vpextrw $3, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[1,1,3,3]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512VL-NEXT: vpextrw $2, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512VL-NEXT: vpextrw $1, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512VL-NEXT: vmovd %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512VL-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; AVX512VL-NEXT: # ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
+; AVX512VL-NEXT: addq $168, %rsp
+; AVX512VL-NEXT: retq
+ %r = call <16 x half> @llvm.ldexp.v16f16.v16i16(<16 x half> %x, <16 x i16> %exp)
+ ret <16 x half> %r
+}
+declare <16 x half> @llvm.ldexp.v16f16.v16i16(<16 x half>, <16 x i16>)
+
define <8 x float> @test_ldexp_8xfloat(<8 x float> %x, <8 x i32> %exp) nounwind {
; CHECK-LABEL: test_ldexp_8xfloat:
; CHECK: # %bb.0:
@@ -230,6 +786,735 @@ define <4 x double> @test_ldexp_4xdouble(<4 x double> %x, <4 x i32> %exp) nounwi
}
declare <4 x double> @llvm.ldexp.v4f64.v4i32(<4 x double>, <4 x i32>)
+define <32 x half> @test_ldexp_32xhalf(<32 x half> %x, <32 x i16> %exp) nounwind {
+; AVX512-LABEL: test_ldexp_32xhalf:
+; AVX512: # %bb.0:
+; AVX512-NEXT: subq $360, %rsp # imm = 0x168
+; AVX512-NEXT: vmovdqu64 %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; AVX512-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm0
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vextracti32x4 $3, %zmm1, %xmm1
+; AVX512-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpextrw $7, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[3,3,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $6, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $5, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,0]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $4, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm0
+; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512-NEXT: vextracti32x4 $2, %zmm1, %xmm1
+; AVX512-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpextrw $7, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpermilps $255, (%rsp), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[3,3,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $6, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $5, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,0]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $4, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm1
+; AVX512-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpextrw $7, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[3,3,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $6, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $5, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,0]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $4, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512-NEXT: vpextrw $7, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[3,3,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512-NEXT: vpextrw $6, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512-NEXT: vpextrw $5, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,0]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512-NEXT: vpextrw $4, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
+; AVX512-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $3, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,1,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $2, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $1, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vmovd %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $3, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,1,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $2, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $1, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vcvtph2ps (%rsp), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vmovd %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $3, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,1,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $2, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $1, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vmovd %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512-NEXT: vpextrw $3, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,1,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512-NEXT: vpextrw $2, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; AVX512-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512-NEXT: vpextrw $1, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512-NEXT: vmovd %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
+; AVX512-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 64-byte Folded Reload
+; AVX512-NEXT: # zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
+; AVX512-NEXT: addq $360, %rsp # imm = 0x168
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: test_ldexp_32xhalf:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: subq $360, %rsp # imm = 0x168
+; AVX512VL-NEXT: vmovdqu64 %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; AVX512VL-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; AVX512VL-NEXT: vextracti32x4 $3, %zmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vextracti32x4 $3, %zmm1, %xmm1
+; AVX512VL-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpextrw $7, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[3,3,3,3]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $6, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $5, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[1,0]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $4, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; AVX512VL-NEXT: vextracti32x4 $2, %zmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512VL-NEXT: vextracti32x4 $2, %zmm1, %xmm1
+; AVX512VL-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpextrw $7, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpermilps $255, (%rsp), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[3,3,3,3]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $6, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $5, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[1,0]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $4, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512VL-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512VL-NEXT: vextracti128 $1, %ymm1, %xmm1
+; AVX512VL-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpextrw $7, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[3,3,3,3]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $6, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $5, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[1,0]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $4, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512VL-NEXT: vpextrw $7, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[3,3,3,3]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512VL-NEXT: vpextrw $6, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512VL-NEXT: vpextrw $5, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[1,0]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512VL-NEXT: vpextrw $4, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512VL-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
+; AVX512VL-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; AVX512VL-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $3, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[1,1,3,3]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $2, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $1, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovd %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpsrlq $48, (%rsp), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $3, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[1,1,3,3]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $2, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpsrld $16, (%rsp), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $1, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vcvtph2ps (%rsp), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovd %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512VL-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512VL-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $3, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[1,1,3,3]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $2, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $1, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovd %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512VL-NEXT: vpextrw $3, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[1,1,3,3]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512VL-NEXT: vpextrw $2, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512VL-NEXT: vpextrw $1, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
+; AVX512VL-NEXT: vmovd %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512VL-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
+; AVX512VL-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 64-byte Folded Reload
+; AVX512VL-NEXT: # zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
+; AVX512VL-NEXT: addq $360, %rsp # imm = 0x168
+; AVX512VL-NEXT: retq
+ %r = call <32 x half> @llvm.ldexp.v32f16.v32i16(<32 x half> %x, <32 x i16> %exp)
+ ret <32 x half> %r
+}
+declare <32 x half> @llvm.ldexp.v32f16.v32i16(<32 x half>, <32 x i16>)
+
define <16 x float> @test_ldexp_16xfloat(<16 x float> %x, <16 x i32> %exp) nounwind {
; CHECK-LABEL: test_ldexp_16xfloat:
; CHECK: # %bb.0:
@@ -462,6 +1747,3 @@ define <8 x double> @test_ldexp_8xdouble(<8 x double> %x, <8 x i32> %exp) nounwi
}
declare <8 x double> @llvm.ldexp.v8f64.v8i32(<8 x double>, <8 x i32>)
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; AVX512: {{.*}}
-; AVX512VL: {{.*}}
More information about the llvm-commits
mailing list