[llvm] [X86] ldexp-avx512.ll - add v8f16/v16f16/v32f16 test coverage for #165694 (PR #167294)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 10 02:27:12 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
---
Patch is 75.10 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/167294.diff
1 Files Affected:
- (modified) llvm/test/CodeGen/X86/ldexp-avx512.ll (+1285-3)
``````````diff
diff --git a/llvm/test/CodeGen/X86/ldexp-avx512.ll b/llvm/test/CodeGen/X86/ldexp-avx512.ll
index ea93a911a1ad0..21491bc2cc8f5 100644
--- a/llvm/test/CodeGen/X86/ldexp-avx512.ll
+++ b/llvm/test/CodeGen/X86/ldexp-avx512.ll
@@ -47,6 +47,187 @@ entry:
}
declare fp128 @ldexpl(fp128, i32) memory(none)
+define <8 x half> @test_ldexp_8xhalf(<8 x half> %x, <8 x i16> %exp) nounwind {
+; AVX512-LABEL: test_ldexp_8xhalf:
+; AVX512: # %bb.0:
+; AVX512-NEXT: subq $88, %rsp
+; AVX512-NEXT: vmovdqa %xmm1, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm2, %xmm0
+; AVX512-NEXT: vpextrw $7, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[3,3,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $6, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $5, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,0]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $4, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $3, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,1,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $2, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $1, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vmovd %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512-NEXT: addq $88, %rsp
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: test_ldexp_8xhalf:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: subq $88, %rsp
+; AVX512VL-NEXT: vmovdqa %xmm1, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm0
+; AVX512VL-NEXT: vpextrw $7, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[3,3,3,3]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $6, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $5, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[1,0]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $4, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $3, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = mem[1,1,3,3]
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $2, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vpextrw $1, %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512VL-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovd %xmm1, %eax
+; AVX512VL-NEXT: movswl %ax, %edi
+; AVX512VL-NEXT: callq ldexpf at PLT
+; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512VL-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX512VL-NEXT: addq $88, %rsp
+; AVX512VL-NEXT: retq
+ %r = call <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half> %x, <8 x i16> %exp)
+ ret <8 x half> %r
+}
+declare <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half>, <8 x i16>)
+
define <4 x float> @test_ldexp_4xfloat(<4 x float> %x, <4 x i32> %exp) nounwind {
; CHECK-LABEL: test_ldexp_4xfloat:
; CHECK: # %bb.0:
@@ -109,6 +290,381 @@ define <2 x double> @test_ldexp_2xdouble(<2 x double> %x, <2 x i32> %exp) nounwi
}
declare <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>)
+define <16 x half> @test_ldexp_16xhalf(<16 x half> %x, <16 x i16> %exp) nounwind {
+; AVX512-LABEL: test_ldexp_16xhalf:
+; AVX512: # %bb.0:
+; AVX512-NEXT: subq $168, %rsp
+; AVX512-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm1
+; AVX512-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpextrw $7, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[3,3,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $6, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $5, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,0]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $4, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512-NEXT: vpextrw $7, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[3,3,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512-NEXT: vpextrw $6, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512-NEXT: vpextrw $5, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,0]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512-NEXT: vpextrw $4, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $3, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,1,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $2, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vpextrw $1, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vmovd %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512-NEXT: vpextrw $3, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = mem[1,1,3,3]
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512-NEXT: vpextrw $2, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %edi
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: callq ldexpf at PLT
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; AVX512-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512-NEXT: vpextrw $1, %xmm1, %eax
+; AVX512-NEXT: movswl %ax, %e...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/167294
More information about the llvm-commits
mailing list