[llvm] 3b6aec7 - [X86] Add test cases for v4i64->v4f32 and v8i64->v8f32 strict_sint_to_fp/strict_uint_to_fp to vec-strict-inttofp-256.ll and vec-strict-inttofp-512.ll. NFC
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 28 11:21:37 PST 2019
Author: Craig Topper
Date: 2019-12-28T11:20:45-08:00
New Revision: 3b6aec79b2cb9b114b0624270bb7129c52ec69ce
URL: https://github.com/llvm/llvm-project/commit/3b6aec79b2cb9b114b0624270bb7129c52ec69ce
DIFF: https://github.com/llvm/llvm-project/commit/3b6aec79b2cb9b114b0624270bb7129c52ec69ce.diff
LOG: [X86] Add test cases for v4i64->v4f32 and v8i64->v8f32 strict_sint_to_fp/strict_uint_to_fp to vec-strict-inttofp-256.ll and vec-strict-inttofp-512.ll. NFC
Added:
Modified:
llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
index 683e901ae54b..d61325e527ea 100644
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
@@ -28,6 +28,8 @@ declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32>
declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
define <8 x float> @sitofp_v8i1_v8f32(<8 x i1> %x) #0 {
; CHECK-LABEL: sitofp_v8i1_v8f32:
@@ -515,4 +517,244 @@ define <4 x double> @uitofp_v4i64_v4f64(<4 x i64> %x) #0 {
ret <4 x double> %result
}
+define <4 x float> @sitofp_v4i64_v4f32(<4 x i64> %x) #0 {
+; AVX-32-LABEL: sitofp_v4i64_v4f32:
+; AVX-32: # %bb.0:
+; AVX-32-NEXT: pushl %ebp
+; AVX-32-NEXT: .cfi_def_cfa_offset 8
+; AVX-32-NEXT: .cfi_offset %ebp, -8
+; AVX-32-NEXT: movl %esp, %ebp
+; AVX-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX-32-NEXT: andl $-8, %esp
+; AVX-32-NEXT: subl $48, %esp
+; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
+; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
+; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
+; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
+; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
+; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
+; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
+; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: fstps (%esp)
+; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
+; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
+; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; AVX-32-NEXT: movl %ebp, %esp
+; AVX-32-NEXT: popl %ebp
+; AVX-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX-32-NEXT: vzeroupper
+; AVX-32-NEXT: retl
+;
+; AVX-64-LABEL: sitofp_v4i64_v4f32:
+; AVX-64: # %bb.0:
+; AVX-64-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX-64-NEXT: vmovq %xmm0, %rax
+; AVX-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
+; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX-64-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX-64-NEXT: vmovq %xmm0, %rax
+; AVX-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
+; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX-64-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
+; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX-64-NEXT: vzeroupper
+; AVX-64-NEXT: retq
+;
+; AVX512DQ-LABEL: sitofp_v4i64_v4f32:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
+; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: sitofp_v4i64_v4f32:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvtqq2ps %ymm0, %xmm0
+; AVX512DQVL-NEXT: vzeroupper
+; AVX512DQVL-NEXT: ret{{[l|q]}}
+ %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x float> %result
+}
+
+define <4 x float> @uitofp_v4i64_v4f32(<4 x i64> %x) #0 {
+; AVX-32-LABEL: uitofp_v4i64_v4f32:
+; AVX-32: # %bb.0:
+; AVX-32-NEXT: pushl %ebp
+; AVX-32-NEXT: .cfi_def_cfa_offset 8
+; AVX-32-NEXT: .cfi_offset %ebp, -8
+; AVX-32-NEXT: movl %esp, %ebp
+; AVX-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX-32-NEXT: andl $-8, %esp
+; AVX-32-NEXT: subl $48, %esp
+; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
+; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
+; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
+; AVX-32-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; AVX-32-NEXT: vmovlps %xmm2, {{[0-9]+}}(%esp)
+; AVX-32-NEXT: vextractps $1, %xmm0, %eax
+; AVX-32-NEXT: xorl %ecx, %ecx
+; AVX-32-NEXT: testl %eax, %eax
+; AVX-32-NEXT: setns %cl
+; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
+; AVX-32-NEXT: fstps (%esp)
+; AVX-32-NEXT: vextractps $3, %xmm0, %eax
+; AVX-32-NEXT: xorl %ecx, %ecx
+; AVX-32-NEXT: testl %eax, %eax
+; AVX-32-NEXT: setns %cl
+; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
+; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
+; AVX-32-NEXT: vextractps $1, %xmm1, %eax
+; AVX-32-NEXT: xorl %ecx, %ecx
+; AVX-32-NEXT: testl %eax, %eax
+; AVX-32-NEXT: setns %cl
+; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
+; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
+; AVX-32-NEXT: vextractps $3, %xmm1, %eax
+; AVX-32-NEXT: xorl %ecx, %ecx
+; AVX-32-NEXT: testl %eax, %eax
+; AVX-32-NEXT: setns %cl
+; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
+; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
+; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
+; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
+; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; AVX-32-NEXT: movl %ebp, %esp
+; AVX-32-NEXT: popl %ebp
+; AVX-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX-32-NEXT: vzeroupper
+; AVX-32-NEXT: retl
+;
+; AVX1-64-LABEL: uitofp_v4i64_v4f32:
+; AVX1-64: # %bb.0:
+; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-64-NEXT: movq %rax, %rcx
+; AVX1-64-NEXT: shrq %rcx
+; AVX1-64-NEXT: movl %eax, %edx
+; AVX1-64-NEXT: andl $1, %edx
+; AVX1-64-NEXT: orq %rcx, %rdx
+; AVX1-64-NEXT: testq %rax, %rax
+; AVX1-64-NEXT: cmovnsq %rax, %rdx
+; AVX1-64-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1
+; AVX1-64-NEXT: jns .LBB19_2
+; AVX1-64-NEXT: # %bb.1:
+; AVX1-64-NEXT: vaddss %xmm1, %xmm1, %xmm1
+; AVX1-64-NEXT: .LBB19_2:
+; AVX1-64-NEXT: vmovq %xmm0, %rax
+; AVX1-64-NEXT: movq %rax, %rcx
+; AVX1-64-NEXT: shrq %rcx
+; AVX1-64-NEXT: movl %eax, %edx
+; AVX1-64-NEXT: andl $1, %edx
+; AVX1-64-NEXT: orq %rcx, %rdx
+; AVX1-64-NEXT: testq %rax, %rax
+; AVX1-64-NEXT: cmovnsq %rax, %rdx
+; AVX1-64-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm2
+; AVX1-64-NEXT: jns .LBB19_4
+; AVX1-64-NEXT: # %bb.3:
+; AVX1-64-NEXT: vaddss %xmm2, %xmm2, %xmm2
+; AVX1-64-NEXT: .LBB19_4:
+; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX1-64-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX1-64-NEXT: vmovq %xmm0, %rax
+; AVX1-64-NEXT: movq %rax, %rcx
+; AVX1-64-NEXT: shrq %rcx
+; AVX1-64-NEXT: movl %eax, %edx
+; AVX1-64-NEXT: andl $1, %edx
+; AVX1-64-NEXT: orq %rcx, %rdx
+; AVX1-64-NEXT: testq %rax, %rax
+; AVX1-64-NEXT: cmovnsq %rax, %rdx
+; AVX1-64-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm2
+; AVX1-64-NEXT: jns .LBB19_6
+; AVX1-64-NEXT: # %bb.5:
+; AVX1-64-NEXT: vaddss %xmm2, %xmm2, %xmm2
+; AVX1-64-NEXT: .LBB19_6:
+; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-64-NEXT: movq %rax, %rcx
+; AVX1-64-NEXT: shrq %rcx
+; AVX1-64-NEXT: movl %eax, %edx
+; AVX1-64-NEXT: andl $1, %edx
+; AVX1-64-NEXT: orq %rcx, %rdx
+; AVX1-64-NEXT: testq %rax, %rax
+; AVX1-64-NEXT: cmovnsq %rax, %rdx
+; AVX1-64-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm0
+; AVX1-64-NEXT: jns .LBB19_8
+; AVX1-64-NEXT: # %bb.7:
+; AVX1-64-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX1-64-NEXT: .LBB19_8:
+; AVX1-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX1-64-NEXT: vzeroupper
+; AVX1-64-NEXT: retq
+;
+; AVX512F-64-LABEL: uitofp_v4i64_v4f32:
+; AVX512F-64: # %bb.0:
+; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
+; AVX512F-64-NEXT: vmovq %xmm0, %rax
+; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
+; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX512F-64-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512F-64-NEXT: vmovq %xmm0, %rax
+; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
+; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
+; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX512F-64-NEXT: vzeroupper
+; AVX512F-64-NEXT: retq
+;
+; AVX512VL-64-LABEL: uitofp_v4i64_v4f32:
+; AVX512VL-64: # %bb.0:
+; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
+; AVX512VL-64-NEXT: vmovq %xmm0, %rax
+; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
+; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX512VL-64-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512VL-64-NEXT: vmovq %xmm0, %rax
+; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
+; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
+; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX512VL-64-NEXT: vzeroupper
+; AVX512VL-64-NEXT: retq
+;
+; AVX512DQ-LABEL: uitofp_v4i64_v4f32:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
+; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: uitofp_v4i64_v4f32:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvtuqq2ps %ymm0, %xmm0
+; AVX512DQVL-NEXT: vzeroupper
+; AVX512DQVL-NEXT: ret{{[l|q]}}
+ %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x float> %result
+}
+
attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll
index 668d960ade74..e3ee8da0bb86 100644
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll
@@ -22,6 +22,8 @@ declare <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i32(<8 x i32>
declare <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i32(<8 x i32>, metadata, metadata)
declare <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64>, metadata, metadata)
declare <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64>, metadata, metadata)
+declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64>, metadata, metadata)
+declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64>, metadata, metadata)
define <16 x float> @sitofp_v16i1_v16f32(<16 x i1> %x) #0 {
; CHECK-LABEL: sitofp_v16i1_v16f32:
@@ -387,4 +389,234 @@ define <8 x double> @uitofp_v8i64_v8f64(<8 x i64> %x) #0 {
ret <8 x double> %result
}
+define <8 x float> @sitofp_v8i64_v8f32(<8 x i64> %x) #0 {
+; NODQ-32-LABEL: sitofp_v8i64_v8f32:
+; NODQ-32: # %bb.0:
+; NODQ-32-NEXT: pushl %ebp
+; NODQ-32-NEXT: .cfi_def_cfa_offset 8
+; NODQ-32-NEXT: .cfi_offset %ebp, -8
+; NODQ-32-NEXT: movl %esp, %ebp
+; NODQ-32-NEXT: .cfi_def_cfa_register %ebp
+; NODQ-32-NEXT: andl $-8, %esp
+; NODQ-32-NEXT: subl $96, %esp
+; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vextractf128 $1, %ymm0, %xmm1
+; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vextractf32x4 $2, %zmm0, %xmm1
+; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vextractf32x4 $3, %zmm0, %xmm0
+; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
+; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
+; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fstps (%esp)
+; NODQ-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
+; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
+; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
+; NODQ-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; NODQ-32-NEXT: movl %ebp, %esp
+; NODQ-32-NEXT: popl %ebp
+; NODQ-32-NEXT: .cfi_def_cfa %esp, 4
+; NODQ-32-NEXT: retl
+;
+; NODQ-64-LABEL: sitofp_v8i64_v8f32:
+; NODQ-64: # %bb.0:
+; NODQ-64-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; NODQ-64-NEXT: vpextrq $1, %xmm1, %rax
+; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
+; NODQ-64-NEXT: vmovq %xmm1, %rax
+; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
+; NODQ-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
+; NODQ-64-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NODQ-64-NEXT: vmovq %xmm2, %rax
+; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
+; NODQ-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
+; NODQ-64-NEXT: vpextrq $1, %xmm2, %rax
+; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
+; NODQ-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
+; NODQ-64-NEXT: vpextrq $1, %xmm0, %rax
+; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
+; NODQ-64-NEXT: vmovq %xmm0, %rax
+; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
+; NODQ-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
+; NODQ-64-NEXT: vextracti128 $1, %ymm0, %xmm0
+; NODQ-64-NEXT: vmovq %xmm0, %rax
+; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
+; NODQ-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
+; NODQ-64-NEXT: vpextrq $1, %xmm0, %rax
+; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm0
+; NODQ-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
+; NODQ-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; NODQ-64-NEXT: retq
+;
+; DQ-LABEL: sitofp_v8i64_v8f32:
+; DQ: # %bb.0:
+; DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
+; DQ-NEXT: ret{{[l|q]}}
+ %result = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <8 x float> %result
+}
+
+define <8 x float> @uitofp_v8i64_v8f32(<8 x i64> %x) #0 {
+; NODQ-32-LABEL: uitofp_v8i64_v8f32:
+; NODQ-32: # %bb.0:
+; NODQ-32-NEXT: pushl %ebp
+; NODQ-32-NEXT: .cfi_def_cfa_offset 8
+; NODQ-32-NEXT: .cfi_offset %ebp, -8
+; NODQ-32-NEXT: movl %esp, %ebp
+; NODQ-32-NEXT: .cfi_def_cfa_register %ebp
+; NODQ-32-NEXT: andl $-8, %esp
+; NODQ-32-NEXT: subl $96, %esp
+; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vextractf128 $1, %ymm0, %xmm3
+; NODQ-32-NEXT: vmovlps %xmm3, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm3[2,3,0,1]
+; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vextractf32x4 $2, %zmm0, %xmm2
+; NODQ-32-NEXT: vmovlps %xmm2, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm2[2,3,0,1]
+; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vextractf32x4 $3, %zmm0, %xmm1
+; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vpermilps {{.*#+}} xmm4 = xmm1[2,3,0,1]
+; NODQ-32-NEXT: vmovlps %xmm4, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vextractps $1, %xmm0, %eax
+; NODQ-32-NEXT: xorl %ecx, %ecx
+; NODQ-32-NEXT: testl %eax, %eax
+; NODQ-32-NEXT: setns %cl
+; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
+; NODQ-32-NEXT: fstps (%esp)
+; NODQ-32-NEXT: vextractps $3, %xmm0, %eax
+; NODQ-32-NEXT: xorl %ecx, %ecx
+; NODQ-32-NEXT: testl %eax, %eax
+; NODQ-32-NEXT: setns %cl
+; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
+; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vextractps $1, %xmm3, %eax
+; NODQ-32-NEXT: xorl %ecx, %ecx
+; NODQ-32-NEXT: testl %eax, %eax
+; NODQ-32-NEXT: setns %cl
+; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
+; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vextractps $3, %xmm3, %eax
+; NODQ-32-NEXT: xorl %ecx, %ecx
+; NODQ-32-NEXT: testl %eax, %eax
+; NODQ-32-NEXT: setns %cl
+; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
+; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vextractps $1, %xmm2, %eax
+; NODQ-32-NEXT: xorl %ecx, %ecx
+; NODQ-32-NEXT: testl %eax, %eax
+; NODQ-32-NEXT: setns %cl
+; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
+; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vextractps $3, %xmm2, %eax
+; NODQ-32-NEXT: xorl %ecx, %ecx
+; NODQ-32-NEXT: testl %eax, %eax
+; NODQ-32-NEXT: setns %cl
+; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
+; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vextractps $1, %xmm1, %eax
+; NODQ-32-NEXT: xorl %ecx, %ecx
+; NODQ-32-NEXT: testl %eax, %eax
+; NODQ-32-NEXT: setns %cl
+; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
+; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vextractps $3, %xmm1, %eax
+; NODQ-32-NEXT: xorl %ecx, %ecx
+; NODQ-32-NEXT: testl %eax, %eax
+; NODQ-32-NEXT: setns %cl
+; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
+; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
+; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
+; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; NODQ-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
+; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
+; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
+; NODQ-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; NODQ-32-NEXT: movl %ebp, %esp
+; NODQ-32-NEXT: popl %ebp
+; NODQ-32-NEXT: .cfi_def_cfa %esp, 4
+; NODQ-32-NEXT: retl
+;
+; NODQ-64-LABEL: uitofp_v8i64_v8f32:
+; NODQ-64: # %bb.0:
+; NODQ-64-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; NODQ-64-NEXT: vpextrq $1, %xmm1, %rax
+; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
+; NODQ-64-NEXT: vmovq %xmm1, %rax
+; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm1
+; NODQ-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
+; NODQ-64-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NODQ-64-NEXT: vmovq %xmm2, %rax
+; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm3
+; NODQ-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
+; NODQ-64-NEXT: vpextrq $1, %xmm2, %rax
+; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm4, %xmm2
+; NODQ-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
+; NODQ-64-NEXT: vpextrq $1, %xmm0, %rax
+; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm4, %xmm2
+; NODQ-64-NEXT: vmovq %xmm0, %rax
+; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm4, %xmm3
+; NODQ-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
+; NODQ-64-NEXT: vextracti128 $1, %ymm0, %xmm0
+; NODQ-64-NEXT: vmovq %xmm0, %rax
+; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm4, %xmm3
+; NODQ-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
+; NODQ-64-NEXT: vpextrq $1, %xmm0, %rax
+; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm4, %xmm0
+; NODQ-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
+; NODQ-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; NODQ-64-NEXT: retq
+;
+; DQ-LABEL: uitofp_v8i64_v8f32:
+; DQ: # %bb.0:
+; DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
+; DQ-NEXT: ret{{[l|q]}}
+ %result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <8 x float> %result
+}
+
attributes #0 = { strictfp }
More information about the llvm-commits
mailing list