[llvm] [X86] Add ldexp test coverage for avx512 targets (PR #165698)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 30 04:33:50 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Pulled out of the abandoned patch #<!-- -->69710 to act as a baseline for #<!-- -->165694
---
Patch is 23.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165698.diff
1 Files Affected:
- (added) llvm/test/CodeGen/X86/ldexp-avx512.ll (+467)
``````````diff
diff --git a/llvm/test/CodeGen/X86/ldexp-avx512.ll b/llvm/test/CodeGen/X86/ldexp-avx512.ll
new file mode 100644
index 0000000000000..ea93a911a1ad0
--- /dev/null
+++ b/llvm/test/CodeGen/X86/ldexp-avx512.ll
@@ -0,0 +1,467 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512VL
+
+define half @test_half(half %x, i32 %exp) nounwind {
+; CHECK-LABEL: test_half:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+entry:
+ %r = tail call fast half @llvm.ldexp.f16.i32(half %x, i32 %exp)
+ ret half %r
+}
+declare half @llvm.ldexp.f16.i32(half, i32) memory(none)
+
+define float @test_float(float %x, i32 %exp) nounwind {
+; CHECK-LABEL: test_float:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: jmp ldexpf at PLT # TAILCALL
+entry:
+ %r = tail call fast float @ldexpf(float %x, i32 %exp)
+ ret float %r
+}
+declare float @ldexpf(float, i32) memory(none)
+
+define double @test_double(double %x, i32 %exp) nounwind {
+; CHECK-LABEL: test_double:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: jmp ldexp at PLT # TAILCALL
+entry:
+ %r = tail call fast double @ldexp(double %x, i32 %exp)
+ ret double %r
+}
+declare double @ldexp(double, i32) memory(none)
+
+define fp128 @testExpl(fp128 %x, i32 %exp) nounwind {
+; CHECK-LABEL: testExpl:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: jmp ldexpl at PLT # TAILCALL
+entry:
+ %r = tail call fast fp128 @ldexpl(fp128 %x, i32 %exp)
+ ret fp128 %r
+}
+declare fp128 @ldexpl(fp128, i32) memory(none)
+
+define <4 x float> @test_ldexp_4xfloat(<4 x float> %x, <4 x i32> %exp) nounwind {
+; CHECK-LABEL: test_ldexp_4xfloat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovd %xmm1, %edi
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $2, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $3, %xmm0, %edi
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: addq $56, %rsp
+; CHECK-NEXT: retq
+ %r = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> %x, <4 x i32> %exp)
+ ret <4 x float> %r
+}
+declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>)
+
+define <2 x double> @test_ldexp_2xdouble(<2 x double> %x, <2 x i32> %exp) nounwind {
+; CHECK-LABEL: test_ldexp_2xdouble:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovd %xmm1, %edi
+; CHECK-NEXT: callq ldexp at PLT
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq ldexp at PLT
+; CHECK-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT: addq $56, %rsp
+; CHECK-NEXT: retq
+ %r = call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> %x, <2 x i32> %exp)
+ ret <2 x double> %r
+}
+declare <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>)
+
+define <8 x float> @test_ldexp_8xfloat(<8 x float> %x, <8 x i32> %exp) nounwind {
+; CHECK-LABEL: test_ldexp_8xfloat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $120, %rsp
+; CHECK-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
+; CHECK-NEXT: vmovdqa %xmm1, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovd %xmm1, %edi
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $2, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $3, %xmm0, %edi
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: vmovd %xmm0, %edi
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: vextractps $2, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: vextractps $3, %xmm0, %edi
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $120, %rsp
+; CHECK-NEXT: retq
+ %r = call <8 x float> @llvm.ldexp.v8f32.v8i32(<8 x float> %x, <8 x i32> %exp)
+ ret <8 x float> %r
+}
+declare <8 x float> @llvm.ldexp.v8f32.v8i32(<8 x float>, <8 x i32>)
+
+define <4 x double> @test_ldexp_4xdouble(<4 x double> %x, <4 x i32> %exp) nounwind {
+; CHECK-LABEL: test_ldexp_4xdouble:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $88, %rsp
+; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vextractps $2, %xmm1, %edi
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexp at PLT
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $3, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq ldexp at PLT
+; CHECK-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vmovd %xmm0, %edi
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexp at PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq ldexp at PLT
+; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $88, %rsp
+; CHECK-NEXT: retq
+ %r = call <4 x double> @llvm.ldexp.v4f64.v4i32(<4 x double> %x, <4 x i32> %exp)
+ ret <4 x double> %r
+}
+declare <4 x double> @llvm.ldexp.v4f64.v4i32(<4 x double>, <4 x i32>)
+
+define <16 x float> @test_ldexp_16xfloat(<16 x float> %x, <16 x i32> %exp) nounwind {
+; CHECK-LABEL: test_ldexp_16xfloat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $216, %rsp
+; CHECK-NEXT: vmovdqu64 %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; CHECK-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm0
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vextracti32x4 $3, %zmm1, %xmm1
+; CHECK-NEXT: vmovdqa %xmm1, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovd %xmm1, %edi
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $2, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $3, %xmm0, %edi
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1
+; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0
+; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovd %xmm0, %edi
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $2, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $3, %xmm0, %edi
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovd %xmm0, %edi
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $2, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $3, %xmm0, %edi
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: vmovd %xmm0, %edi
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: vextractps $2, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: vextractps $3, %xmm0, %edi
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; CHECK-NEXT: vinsertf64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
+; CHECK-NEXT: addq $216, %rsp
+; CHECK-NEXT: retq
+ %r = call <16 x float> @llvm.ldexp.v16f32.v16i32(<16 x float> %x, <16 x i32> %exp)
+ ret <16 x float> %r
+}
+declare <16 x float> @llvm.ldexp.v16f32.v16i32(<16 x float>, <16 x i32>)
+
+define <8 x double> @test_ldexp_8xdouble(<8 x double> %x, <8 x i32> %exp) nounwind {
+; CHECK-LABEL: test_ldexp_8xdouble:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $184, %rsp
+; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm0
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vextractf128 $1...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/165698
More information about the llvm-commits
mailing list