[llvm] 3b6aec7 - [X86] Add test cases for v4i64->v4f32 and v8i64->v8f32 strict_sint_to_fp/strict_uint_to_fp to vec-strict-inttofp-256.ll and vec-strict-inttofp-512.ll. NFC

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Dec 28 11:21:37 PST 2019


Author: Craig Topper
Date: 2019-12-28T11:20:45-08:00
New Revision: 3b6aec79b2cb9b114b0624270bb7129c52ec69ce

URL: https://github.com/llvm/llvm-project/commit/3b6aec79b2cb9b114b0624270bb7129c52ec69ce
DIFF: https://github.com/llvm/llvm-project/commit/3b6aec79b2cb9b114b0624270bb7129c52ec69ce.diff

LOG: [X86] Add test cases for v4i64->v4f32 and v8i64->v8f32 strict_sint_to_fp/strict_uint_to_fp to vec-strict-inttofp-256.ll and vec-strict-inttofp-512.ll. NFC

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
    llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
index 683e901ae54b..d61325e527ea 100644
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
@@ -28,6 +28,8 @@ declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32>
 declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32>, metadata, metadata)
 declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64>, metadata, metadata)
 declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
 
 define <8 x float> @sitofp_v8i1_v8f32(<8 x i1> %x) #0 {
 ; CHECK-LABEL: sitofp_v8i1_v8f32:
@@ -515,4 +517,244 @@ define <4 x double> @uitofp_v4i64_v4f64(<4 x i64> %x) #0 {
   ret <4 x double> %result
 }
 
+define <4 x float> @sitofp_v4i64_v4f32(<4 x i64> %x) #0 {
+; AVX-32-LABEL: sitofp_v4i64_v4f32:
+; AVX-32:       # %bb.0:
+; AVX-32-NEXT:    pushl %ebp
+; AVX-32-NEXT:    .cfi_def_cfa_offset 8
+; AVX-32-NEXT:    .cfi_offset %ebp, -8
+; AVX-32-NEXT:    movl %esp, %ebp
+; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
+; AVX-32-NEXT:    andl $-8, %esp
+; AVX-32-NEXT:    subl $48, %esp
+; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    fstps (%esp)
+; AVX-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
+; AVX-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
+; AVX-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; AVX-32-NEXT:    movl %ebp, %esp
+; AVX-32-NEXT:    popl %ebp
+; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
+; AVX-32-NEXT:    vzeroupper
+; AVX-32-NEXT:    retl
+;
+; AVX-64-LABEL: sitofp_v4i64_v4f32:
+; AVX-64:       # %bb.0:
+; AVX-64-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX-64-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX-64-NEXT:    vmovq %xmm0, %rax
+; AVX-64-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
+; AVX-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX-64-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX-64-NEXT:    vmovq %xmm0, %rax
+; AVX-64-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
+; AVX-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX-64-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX-64-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
+; AVX-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX-64-NEXT:    vzeroupper
+; AVX-64-NEXT:    retq
+;
+; AVX512DQ-LABEL: sitofp_v4i64_v4f32:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vmovaps %ymm0, %ymm0
+; AVX512DQ-NEXT:    vcvtqq2ps %zmm0, %ymm0
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: sitofp_v4i64_v4f32:
+; AVX512DQVL:       # %bb.0:
+; AVX512DQVL-NEXT:    vcvtqq2ps %ymm0, %xmm0
+; AVX512DQVL-NEXT:    vzeroupper
+; AVX512DQVL-NEXT:    ret{{[l|q]}}
+ %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <4 x float> %result
+}
+
+define <4 x float> @uitofp_v4i64_v4f32(<4 x i64> %x) #0 {
+; AVX-32-LABEL: uitofp_v4i64_v4f32:
+; AVX-32:       # %bb.0:
+; AVX-32-NEXT:    pushl %ebp
+; AVX-32-NEXT:    .cfi_def_cfa_offset 8
+; AVX-32-NEXT:    .cfi_offset %ebp, -8
+; AVX-32-NEXT:    movl %esp, %ebp
+; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
+; AVX-32-NEXT:    andl $-8, %esp
+; AVX-32-NEXT:    subl $48, %esp
+; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    vpermilps {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; AVX-32-NEXT:    vmovlps %xmm2, {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    vextractps $1, %xmm0, %eax
+; AVX-32-NEXT:    xorl %ecx, %ecx
+; AVX-32-NEXT:    testl %eax, %eax
+; AVX-32-NEXT:    setns %cl
+; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    fadds {{\.LCPI.*}}(,%ecx,4)
+; AVX-32-NEXT:    fstps (%esp)
+; AVX-32-NEXT:    vextractps $3, %xmm0, %eax
+; AVX-32-NEXT:    xorl %ecx, %ecx
+; AVX-32-NEXT:    testl %eax, %eax
+; AVX-32-NEXT:    setns %cl
+; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    fadds {{\.LCPI.*}}(,%ecx,4)
+; AVX-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    vextractps $1, %xmm1, %eax
+; AVX-32-NEXT:    xorl %ecx, %ecx
+; AVX-32-NEXT:    testl %eax, %eax
+; AVX-32-NEXT:    setns %cl
+; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    fadds {{\.LCPI.*}}(,%ecx,4)
+; AVX-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    vextractps $3, %xmm1, %eax
+; AVX-32-NEXT:    xorl %ecx, %ecx
+; AVX-32-NEXT:    testl %eax, %eax
+; AVX-32-NEXT:    setns %cl
+; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    fadds {{\.LCPI.*}}(,%ecx,4)
+; AVX-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
+; AVX-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
+; AVX-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; AVX-32-NEXT:    movl %ebp, %esp
+; AVX-32-NEXT:    popl %ebp
+; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
+; AVX-32-NEXT:    vzeroupper
+; AVX-32-NEXT:    retl
+;
+; AVX1-64-LABEL: uitofp_v4i64_v4f32:
+; AVX1-64:       # %bb.0:
+; AVX1-64-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX1-64-NEXT:    movq %rax, %rcx
+; AVX1-64-NEXT:    shrq %rcx
+; AVX1-64-NEXT:    movl %eax, %edx
+; AVX1-64-NEXT:    andl $1, %edx
+; AVX1-64-NEXT:    orq %rcx, %rdx
+; AVX1-64-NEXT:    testq %rax, %rax
+; AVX1-64-NEXT:    cmovnsq %rax, %rdx
+; AVX1-64-NEXT:    vcvtsi2ss %rdx, %xmm1, %xmm1
+; AVX1-64-NEXT:    jns .LBB19_2
+; AVX1-64-NEXT:  # %bb.1:
+; AVX1-64-NEXT:    vaddss %xmm1, %xmm1, %xmm1
+; AVX1-64-NEXT:  .LBB19_2:
+; AVX1-64-NEXT:    vmovq %xmm0, %rax
+; AVX1-64-NEXT:    movq %rax, %rcx
+; AVX1-64-NEXT:    shrq %rcx
+; AVX1-64-NEXT:    movl %eax, %edx
+; AVX1-64-NEXT:    andl $1, %edx
+; AVX1-64-NEXT:    orq %rcx, %rdx
+; AVX1-64-NEXT:    testq %rax, %rax
+; AVX1-64-NEXT:    cmovnsq %rax, %rdx
+; AVX1-64-NEXT:    vcvtsi2ss %rdx, %xmm2, %xmm2
+; AVX1-64-NEXT:    jns .LBB19_4
+; AVX1-64-NEXT:  # %bb.3:
+; AVX1-64-NEXT:    vaddss %xmm2, %xmm2, %xmm2
+; AVX1-64-NEXT:  .LBB19_4:
+; AVX1-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX1-64-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX1-64-NEXT:    vmovq %xmm0, %rax
+; AVX1-64-NEXT:    movq %rax, %rcx
+; AVX1-64-NEXT:    shrq %rcx
+; AVX1-64-NEXT:    movl %eax, %edx
+; AVX1-64-NEXT:    andl $1, %edx
+; AVX1-64-NEXT:    orq %rcx, %rdx
+; AVX1-64-NEXT:    testq %rax, %rax
+; AVX1-64-NEXT:    cmovnsq %rax, %rdx
+; AVX1-64-NEXT:    vcvtsi2ss %rdx, %xmm3, %xmm2
+; AVX1-64-NEXT:    jns .LBB19_6
+; AVX1-64-NEXT:  # %bb.5:
+; AVX1-64-NEXT:    vaddss %xmm2, %xmm2, %xmm2
+; AVX1-64-NEXT:  .LBB19_6:
+; AVX1-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX1-64-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX1-64-NEXT:    movq %rax, %rcx
+; AVX1-64-NEXT:    shrq %rcx
+; AVX1-64-NEXT:    movl %eax, %edx
+; AVX1-64-NEXT:    andl $1, %edx
+; AVX1-64-NEXT:    orq %rcx, %rdx
+; AVX1-64-NEXT:    testq %rax, %rax
+; AVX1-64-NEXT:    cmovnsq %rax, %rdx
+; AVX1-64-NEXT:    vcvtsi2ss %rdx, %xmm3, %xmm0
+; AVX1-64-NEXT:    jns .LBB19_8
+; AVX1-64-NEXT:  # %bb.7:
+; AVX1-64-NEXT:    vaddss %xmm0, %xmm0, %xmm0
+; AVX1-64-NEXT:  .LBB19_8:
+; AVX1-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX1-64-NEXT:    vzeroupper
+; AVX1-64-NEXT:    retq
+;
+; AVX512F-64-LABEL: uitofp_v4i64_v4f32:
+; AVX512F-64:       # %bb.0:
+; AVX512F-64-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX512F-64-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
+; AVX512F-64-NEXT:    vmovq %xmm0, %rax
+; AVX512F-64-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX512F-64-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX512F-64-NEXT:    vmovq %xmm0, %rax
+; AVX512F-64-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm2
+; AVX512F-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX512F-64-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX512F-64-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm0
+; AVX512F-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX512F-64-NEXT:    vzeroupper
+; AVX512F-64-NEXT:    retq
+;
+; AVX512VL-64-LABEL: uitofp_v4i64_v4f32:
+; AVX512VL-64:       # %bb.0:
+; AVX512VL-64-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX512VL-64-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
+; AVX512VL-64-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-64-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm2
+; AVX512VL-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX512VL-64-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX512VL-64-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-64-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm2
+; AVX512VL-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX512VL-64-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX512VL-64-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm0
+; AVX512VL-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX512VL-64-NEXT:    vzeroupper
+; AVX512VL-64-NEXT:    retq
+;
+; AVX512DQ-LABEL: uitofp_v4i64_v4f32:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vmovaps %ymm0, %ymm0
+; AVX512DQ-NEXT:    vcvtuqq2ps %zmm0, %ymm0
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: uitofp_v4i64_v4f32:
+; AVX512DQVL:       # %bb.0:
+; AVX512DQVL-NEXT:    vcvtuqq2ps %ymm0, %xmm0
+; AVX512DQVL-NEXT:    vzeroupper
+; AVX512DQVL-NEXT:    ret{{[l|q]}}
+ %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <4 x float> %result
+}
+
 attributes #0 = { strictfp }

diff  --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll
index 668d960ade74..e3ee8da0bb86 100644
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll
@@ -22,6 +22,8 @@ declare <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i32(<8 x i32>
 declare <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i32(<8 x i32>, metadata, metadata)
 declare <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64>, metadata, metadata)
 declare <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64>, metadata, metadata)
+declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64>, metadata, metadata)
+declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64>, metadata, metadata)
 
 define <16 x float> @sitofp_v16i1_v16f32(<16 x i1> %x) #0 {
 ; CHECK-LABEL: sitofp_v16i1_v16f32:
@@ -387,4 +389,234 @@ define <8 x double> @uitofp_v8i64_v8f64(<8 x i64> %x) #0 {
   ret <8 x double> %result
 }
 
+define <8 x float> @sitofp_v8i64_v8f32(<8 x i64> %x) #0 {
+; NODQ-32-LABEL: sitofp_v8i64_v8f32:
+; NODQ-32:       # %bb.0:
+; NODQ-32-NEXT:    pushl %ebp
+; NODQ-32-NEXT:    .cfi_def_cfa_offset 8
+; NODQ-32-NEXT:    .cfi_offset %ebp, -8
+; NODQ-32-NEXT:    movl %esp, %ebp
+; NODQ-32-NEXT:    .cfi_def_cfa_register %ebp
+; NODQ-32-NEXT:    andl $-8, %esp
+; NODQ-32-NEXT:    subl $96, %esp
+; NODQ-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; NODQ-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; NODQ-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; NODQ-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vextractf32x4 $2, %zmm0, %xmm1
+; NODQ-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; NODQ-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vextractf32x4 $3, %zmm0, %xmm0
+; NODQ-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; NODQ-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; NODQ-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
+; NODQ-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
+; NODQ-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fstps (%esp)
+; NODQ-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; NODQ-32-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
+; NODQ-32-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
+; NODQ-32-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
+; NODQ-32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; NODQ-32-NEXT:    movl %ebp, %esp
+; NODQ-32-NEXT:    popl %ebp
+; NODQ-32-NEXT:    .cfi_def_cfa %esp, 4
+; NODQ-32-NEXT:    retl
+;
+; NODQ-64-LABEL: sitofp_v8i64_v8f32:
+; NODQ-64:       # %bb.0:
+; NODQ-64-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
+; NODQ-64-NEXT:    vpextrq $1, %xmm1, %rax
+; NODQ-64-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
+; NODQ-64-NEXT:    vmovq %xmm1, %rax
+; NODQ-64-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm1
+; NODQ-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
+; NODQ-64-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
+; NODQ-64-NEXT:    vmovq %xmm2, %rax
+; NODQ-64-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm3
+; NODQ-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
+; NODQ-64-NEXT:    vpextrq $1, %xmm2, %rax
+; NODQ-64-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm2
+; NODQ-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
+; NODQ-64-NEXT:    vpextrq $1, %xmm0, %rax
+; NODQ-64-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm2
+; NODQ-64-NEXT:    vmovq %xmm0, %rax
+; NODQ-64-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm3
+; NODQ-64-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
+; NODQ-64-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; NODQ-64-NEXT:    vmovq %xmm0, %rax
+; NODQ-64-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm3
+; NODQ-64-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
+; NODQ-64-NEXT:    vpextrq $1, %xmm0, %rax
+; NODQ-64-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm0
+; NODQ-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
+; NODQ-64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; NODQ-64-NEXT:    retq
+;
+; DQ-LABEL: sitofp_v8i64_v8f32:
+; DQ:       # %bb.0:
+; DQ-NEXT:    vcvtqq2ps %zmm0, %ymm0
+; DQ-NEXT:    ret{{[l|q]}}
+ %result = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <8 x float> %result
+}
+
+define <8 x float> @uitofp_v8i64_v8f32(<8 x i64> %x) #0 {
+; NODQ-32-LABEL: uitofp_v8i64_v8f32:
+; NODQ-32:       # %bb.0:
+; NODQ-32-NEXT:    pushl %ebp
+; NODQ-32-NEXT:    .cfi_def_cfa_offset 8
+; NODQ-32-NEXT:    .cfi_offset %ebp, -8
+; NODQ-32-NEXT:    movl %esp, %ebp
+; NODQ-32-NEXT:    .cfi_def_cfa_register %ebp
+; NODQ-32-NEXT:    andl $-8, %esp
+; NODQ-32-NEXT:    subl $96, %esp
+; NODQ-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; NODQ-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; NODQ-32-NEXT:    vmovlps %xmm3, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm3[2,3,0,1]
+; NODQ-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vextractf32x4 $2, %zmm0, %xmm2
+; NODQ-32-NEXT:    vmovlps %xmm2, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm2[2,3,0,1]
+; NODQ-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vextractf32x4 $3, %zmm0, %xmm1
+; NODQ-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vpermilps {{.*#+}} xmm4 = xmm1[2,3,0,1]
+; NODQ-32-NEXT:    vmovlps %xmm4, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vextractps $1, %xmm0, %eax
+; NODQ-32-NEXT:    xorl %ecx, %ecx
+; NODQ-32-NEXT:    testl %eax, %eax
+; NODQ-32-NEXT:    setns %cl
+; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fadds {{\.LCPI.*}}(,%ecx,4)
+; NODQ-32-NEXT:    fstps (%esp)
+; NODQ-32-NEXT:    vextractps $3, %xmm0, %eax
+; NODQ-32-NEXT:    xorl %ecx, %ecx
+; NODQ-32-NEXT:    testl %eax, %eax
+; NODQ-32-NEXT:    setns %cl
+; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fadds {{\.LCPI.*}}(,%ecx,4)
+; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vextractps $1, %xmm3, %eax
+; NODQ-32-NEXT:    xorl %ecx, %ecx
+; NODQ-32-NEXT:    testl %eax, %eax
+; NODQ-32-NEXT:    setns %cl
+; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fadds {{\.LCPI.*}}(,%ecx,4)
+; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vextractps $3, %xmm3, %eax
+; NODQ-32-NEXT:    xorl %ecx, %ecx
+; NODQ-32-NEXT:    testl %eax, %eax
+; NODQ-32-NEXT:    setns %cl
+; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fadds {{\.LCPI.*}}(,%ecx,4)
+; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vextractps $1, %xmm2, %eax
+; NODQ-32-NEXT:    xorl %ecx, %ecx
+; NODQ-32-NEXT:    testl %eax, %eax
+; NODQ-32-NEXT:    setns %cl
+; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fadds {{\.LCPI.*}}(,%ecx,4)
+; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vextractps $3, %xmm2, %eax
+; NODQ-32-NEXT:    xorl %ecx, %ecx
+; NODQ-32-NEXT:    testl %eax, %eax
+; NODQ-32-NEXT:    setns %cl
+; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fadds {{\.LCPI.*}}(,%ecx,4)
+; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vextractps $1, %xmm1, %eax
+; NODQ-32-NEXT:    xorl %ecx, %ecx
+; NODQ-32-NEXT:    testl %eax, %eax
+; NODQ-32-NEXT:    setns %cl
+; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fadds {{\.LCPI.*}}(,%ecx,4)
+; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vextractps $3, %xmm1, %eax
+; NODQ-32-NEXT:    xorl %ecx, %ecx
+; NODQ-32-NEXT:    testl %eax, %eax
+; NODQ-32-NEXT:    setns %cl
+; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    fadds {{\.LCPI.*}}(,%ecx,4)
+; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; NODQ-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
+; NODQ-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
+; NODQ-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; NODQ-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; NODQ-32-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
+; NODQ-32-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
+; NODQ-32-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
+; NODQ-32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; NODQ-32-NEXT:    movl %ebp, %esp
+; NODQ-32-NEXT:    popl %ebp
+; NODQ-32-NEXT:    .cfi_def_cfa %esp, 4
+; NODQ-32-NEXT:    retl
+;
+; NODQ-64-LABEL: uitofp_v8i64_v8f32:
+; NODQ-64:       # %bb.0:
+; NODQ-64-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
+; NODQ-64-NEXT:    vpextrq $1, %xmm1, %rax
+; NODQ-64-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm2
+; NODQ-64-NEXT:    vmovq %xmm1, %rax
+; NODQ-64-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm1
+; NODQ-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
+; NODQ-64-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
+; NODQ-64-NEXT:    vmovq %xmm2, %rax
+; NODQ-64-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm3
+; NODQ-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
+; NODQ-64-NEXT:    vpextrq $1, %xmm2, %rax
+; NODQ-64-NEXT:    vcvtusi2ss %rax, %xmm4, %xmm2
+; NODQ-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
+; NODQ-64-NEXT:    vpextrq $1, %xmm0, %rax
+; NODQ-64-NEXT:    vcvtusi2ss %rax, %xmm4, %xmm2
+; NODQ-64-NEXT:    vmovq %xmm0, %rax
+; NODQ-64-NEXT:    vcvtusi2ss %rax, %xmm4, %xmm3
+; NODQ-64-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
+; NODQ-64-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; NODQ-64-NEXT:    vmovq %xmm0, %rax
+; NODQ-64-NEXT:    vcvtusi2ss %rax, %xmm4, %xmm3
+; NODQ-64-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
+; NODQ-64-NEXT:    vpextrq $1, %xmm0, %rax
+; NODQ-64-NEXT:    vcvtusi2ss %rax, %xmm4, %xmm0
+; NODQ-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
+; NODQ-64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; NODQ-64-NEXT:    retq
+;
+; DQ-LABEL: uitofp_v8i64_v8f32:
+; DQ:       # %bb.0:
+; DQ-NEXT:    vcvtuqq2ps %zmm0, %ymm0
+; DQ-NEXT:    ret{{[l|q]}}
+ %result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <8 x float> %result
+}
+
 attributes #0 = { strictfp }


        


More information about the llvm-commits mailing list