[llvm] 809c5ac - [X86] Modify tests for constrained rounding functions (#116951)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 25 06:39:38 PST 2024
Author: Serge Pavlov
Date: 2024-11-25T21:39:35+07:00
New Revision: 809c5ac3b0d78f504d93717ac4c0a02816cf47bb
URL: https://github.com/llvm/llvm-project/commit/809c5ac3b0d78f504d93717ac4c0a02816cf47bb
DIFF: https://github.com/llvm/llvm-project/commit/809c5ac3b0d78f504d93717ac4c0a02816cf47bb.diff
LOG: [X86] Modify tests for constrained rounding functions (#116951)
The existing tests for constrained functions often use constant
arguments. If constant evaluation is enhanced, such tests will not check
code generation of the tested functions. To avoid it, the tests are
modified to use loaded value instead of constants. Now only the tests
for rounding functions are changed.
Added:
Modified:
llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index 21dfdc3c2abe49..49062eaef31887 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -3011,25 +3011,26 @@ entry:
ret <4 x double> %log2
}
-define <1 x float> @constrained_vector_rint_v1f32() #0 {
-; CHECK-LABEL: constrained_vector_rint_v1f32:
+define <1 x float> @constrained_vector_rint_v1f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_rint_v1f32_var:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq rintf at PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_rint_v1f32:
+; AVX-LABEL: constrained_vector_rint_v1f32_var:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <1 x float>, ptr %a
%rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32(
- <1 x float> <float 42.0>,
+ <1 x float> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <1 x float> %rint
@@ -3063,42 +3064,77 @@ entry:
ret <2 x double> %rint
}
-define <3 x float> @constrained_vector_rint_v3f32() #0 {
-; CHECK-LABEL: constrained_vector_rint_v3f32:
+define <2 x double> @constrained_vector_rint_v2f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_rint_v2f64_var:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: callq rint at PLT
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq rint at PLT
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_rint_v2f64_var:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vroundpd $4, (%rdi), %xmm0
+; AVX-NEXT: retq
+entry:
+ %b = load <2 x double>, ptr %a
+ %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
+ <2 x double> %b,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <2 x double> %rint
+}
+
+define <3 x float> @constrained_vector_rint_v3f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_rint_v3f32_var:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq rintf at PLT
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq rintf at PLT
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq rintf at PLT
-; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_rint_v3f32:
+; AVX-LABEL: constrained_vector_rint_v3f32_var:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX-NEXT: retq
entry:
+ %b = load <3 x float>, ptr %a
%rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
- <3 x float> <float 42.0, float 43.0, float 44.0>,
+ <3 x float> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <3 x float> %rint
@@ -3143,6 +3179,51 @@ entry:
ret <3 x double> %rint
}
+define <3 x double> @constrained_vector_rint_v3f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_rint_v3f64_var:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq rint at PLT
+; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq rint at PLT
+; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
+; CHECK-NEXT: callq rint at PLT
+; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
+; CHECK-NEXT: # xmm1 = mem[0],zero
+; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_rint_v3f64_var:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vroundpd $4, (%rdi), %xmm1
+; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX-NEXT: retq
+entry:
+ %b = load <3 x double>, ptr %a
+ %rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64(
+ <3 x double> %b,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <3 x double> %rint
+}
+
define <4 x double> @constrained_vector_rint_v4f64() #0 {
; CHECK-LABEL: constrained_vector_rint_v4f64:
; CHECK: # %bb.0: # %entry
@@ -3182,25 +3263,70 @@ entry:
ret <4 x double> %rint
}
-define <1 x float> @constrained_vector_nearbyint_v1f32() #0 {
-; CHECK-LABEL: constrained_vector_nearbyint_v1f32:
+define <4 x double> @constrained_vector_rint_v4f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_rint_v4f64_var:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: movaps (%rdi), %xmm1
+; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps 16(%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: callq rint at PLT
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq rint at PLT
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq rint at PLT
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq rint at PLT
+; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: addq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_rint_v4f64_var:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vroundpd $4, (%rdi), %ymm0
+; AVX-NEXT: retq
+entry:
+ %b = load <4 x double>, ptr %a
+ %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
+ <4 x double> %b,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x double> %rint
+}
+
+define <1 x float> @constrained_vector_nearbyint_v1f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_nearbyint_v1f32_var:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq nearbyintf at PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_nearbyint_v1f32:
+; AVX-LABEL: constrained_vector_nearbyint_v1f32_var:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <1 x float>, ptr %a
%nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(
- <1 x float> <float 42.0>,
+ <1 x float> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <1 x float> %nearby
@@ -3234,42 +3360,77 @@ entry:
ret <2 x double> %nearby
}
-define <3 x float> @constrained_vector_nearbyint_v3f32() #0 {
-; CHECK-LABEL: constrained_vector_nearbyint_v3f32:
+define <2 x double> @constrained_vector_nearbyint_v2f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_nearbyint_v2f64_var:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: callq nearbyint at PLT
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq nearbyint at PLT
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_nearbyint_v2f64_var:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vroundpd $12, (%rdi), %xmm0
+; AVX-NEXT: retq
+entry:
+ %b = load <2 x double>, ptr %a
+ %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
+ <2 x double> %b,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <2 x double> %nearby
+}
+
+define <3 x float> @constrained_vector_nearbyint_v3f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_nearbyint_v3f32_var:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq nearbyintf at PLT
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq nearbyintf at PLT
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq nearbyintf at PLT
-; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_nearbyint_v3f32:
+; AVX-LABEL: constrained_vector_nearbyint_v3f32_var:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $12, %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: vroundss $12, %xmm2, %xmm2, %xmm2
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX-NEXT: retq
entry:
+ %b = load <3 x float>, ptr %a
%nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(
- <3 x float> <float 42.0, float 43.0, float 44.0>,
+ <3 x float> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <3 x float> %nearby
@@ -3314,6 +3475,51 @@ entry:
ret <3 x double> %nearby
}
+define <3 x double> @constrained_vector_nearbyint_v3f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_nearbyint_v3f64_var:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq nearbyint at PLT
+; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq nearbyint at PLT
+; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
+; CHECK-NEXT: callq nearbyint at PLT
+; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
+; CHECK-NEXT: # xmm1 = mem[0],zero
+; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_nearbyint_v3f64_var:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vroundpd $12, (%rdi), %xmm1
+; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX-NEXT: retq
+entry:
+ %b = load <3 x double>, ptr %a
+ %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(
+ <3 x double> %b,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <3 x double> %nearby
+}
+
define <4 x double> @constrained_vector_nearbyint_v4f64() #0 {
; CHECK-LABEL: constrained_vector_nearbyint_v4f64:
; CHECK: # %bb.0: # %entry
@@ -3353,6 +3559,50 @@ entry:
ret <4 x double> %nearby
}
+define <4 x double> @constrained_vector_nearbyint_v4f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_nearbyint_v4f64_var:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: movaps (%rdi), %xmm1
+; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps 16(%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: callq nearbyint at PLT
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq nearbyint at PLT
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq nearbyint at PLT
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq nearbyint at PLT
+; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: addq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_nearbyint_v4f64_var:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vroundpd $12, (%rdi), %ymm0
+; AVX-NEXT: retq
+entry:
+ %b = load <4 x double>, ptr %a
+ %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
+ <4 x double> %b,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x double> %nearby
+}
+
define <1 x float> @constrained_vector_maxnum_v1f32() #0 {
; CHECK-LABEL: constrained_vector_maxnum_v1f32:
; CHECK: # %bb.0: # %entry
@@ -4482,10 +4732,10 @@ define <1 x i64> @constrained_vector_fptoui_v1i64_v1f32() #0 {
; CHECK-NEXT: movss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: comiss %xmm0, %xmm2
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ja .LBB115_2
+; CHECK-NEXT: ja .LBB121_2
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: movaps %xmm2, %xmm1
-; CHECK-NEXT: .LBB115_2: # %entry
+; CHECK-NEXT: .LBB121_2: # %entry
; CHECK-NEXT: subss %xmm1, %xmm0
; CHECK-NEXT: cvttss2si %xmm0, %rcx
; CHECK-NEXT: setbe %al
@@ -4500,10 +4750,10 @@ define <1 x i64> @constrained_vector_fptoui_v1i64_v1f32() #0 {
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
; AVX1-NEXT: vcomiss %xmm0, %xmm1
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: ja .LBB115_2
+; AVX1-NEXT: ja .LBB121_2
; AVX1-NEXT: # %bb.1: # %entry
; AVX1-NEXT: vmovaps %xmm1, %xmm2
-; AVX1-NEXT: .LBB115_2: # %entry
+; AVX1-NEXT: .LBB121_2: # %entry
; AVX1-NEXT: vsubss %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vcvttss2si %xmm0, %rcx
; AVX1-NEXT: setbe %al
@@ -4531,10 +4781,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 {
; CHECK-NEXT: comiss %xmm2, %xmm1
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: xorps %xmm3, %xmm3
-; CHECK-NEXT: ja .LBB116_2
+; CHECK-NEXT: ja .LBB122_2
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: movaps %xmm1, %xmm3
-; CHECK-NEXT: .LBB116_2: # %entry
+; CHECK-NEXT: .LBB122_2: # %entry
; CHECK-NEXT: subss %xmm3, %xmm2
; CHECK-NEXT: cvttss2si %xmm2, %rax
; CHECK-NEXT: setbe %cl
@@ -4544,10 +4794,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 {
; CHECK-NEXT: movq %rcx, %xmm2
; CHECK-NEXT: movss {{.*#+}} xmm3 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: comiss %xmm3, %xmm1
-; CHECK-NEXT: ja .LBB116_4
+; CHECK-NEXT: ja .LBB122_4
; CHECK-NEXT: # %bb.3: # %entry
; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: .LBB116_4: # %entry
+; CHECK-NEXT: .LBB122_4: # %entry
; CHECK-NEXT: subss %xmm0, %xmm3
; CHECK-NEXT: cvttss2si %xmm3, %rax
; CHECK-NEXT: setbe %cl
@@ -4565,10 +4815,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 {
; AVX1-NEXT: vcomiss %xmm2, %xmm0
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: ja .LBB116_2
+; AVX1-NEXT: ja .LBB122_2
; AVX1-NEXT: # %bb.1: # %entry
; AVX1-NEXT: vmovaps %xmm0, %xmm3
-; AVX1-NEXT: .LBB116_2: # %entry
+; AVX1-NEXT: .LBB122_2: # %entry
; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vcvttss2si %xmm2, %rax
; AVX1-NEXT: setbe %cl
@@ -4578,10 +4828,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 {
; AVX1-NEXT: vmovq %rcx, %xmm2
; AVX1-NEXT: vmovss {{.*#+}} xmm3 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
; AVX1-NEXT: vcomiss %xmm3, %xmm0
-; AVX1-NEXT: ja .LBB116_4
+; AVX1-NEXT: ja .LBB122_4
; AVX1-NEXT: # %bb.3: # %entry
; AVX1-NEXT: vmovaps %xmm0, %xmm1
-; AVX1-NEXT: .LBB116_4: # %entry
+; AVX1-NEXT: .LBB122_4: # %entry
; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm0
; AVX1-NEXT: vcvttss2si %xmm0, %rax
; AVX1-NEXT: setbe %cl
@@ -4622,10 +4872,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 {
; CHECK-NEXT: comiss %xmm2, %xmm1
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: xorps %xmm3, %xmm3
-; CHECK-NEXT: ja .LBB117_2
+; CHECK-NEXT: ja .LBB123_2
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: movaps %xmm1, %xmm3
-; CHECK-NEXT: .LBB117_2: # %entry
+; CHECK-NEXT: .LBB123_2: # %entry
; CHECK-NEXT: subss %xmm3, %xmm2
; CHECK-NEXT: cvttss2si %xmm2, %rcx
; CHECK-NEXT: setbe %al
@@ -4635,10 +4885,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 {
; CHECK-NEXT: movss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: comiss %xmm2, %xmm1
; CHECK-NEXT: xorps %xmm3, %xmm3
-; CHECK-NEXT: ja .LBB117_4
+; CHECK-NEXT: ja .LBB123_4
; CHECK-NEXT: # %bb.3: # %entry
; CHECK-NEXT: movaps %xmm1, %xmm3
-; CHECK-NEXT: .LBB117_4: # %entry
+; CHECK-NEXT: .LBB123_4: # %entry
; CHECK-NEXT: subss %xmm3, %xmm2
; CHECK-NEXT: cvttss2si %xmm2, %rcx
; CHECK-NEXT: setbe %dl
@@ -4647,10 +4897,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 {
; CHECK-NEXT: xorq %rcx, %rdx
; CHECK-NEXT: movss {{.*#+}} xmm2 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: comiss %xmm2, %xmm1
-; CHECK-NEXT: ja .LBB117_6
+; CHECK-NEXT: ja .LBB123_6
; CHECK-NEXT: # %bb.5: # %entry
; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: .LBB117_6: # %entry
+; CHECK-NEXT: .LBB123_6: # %entry
; CHECK-NEXT: subss %xmm0, %xmm2
; CHECK-NEXT: cvttss2si %xmm2, %rsi
; CHECK-NEXT: setbe %cl
@@ -4666,10 +4916,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 {
; AVX1-NEXT: vcomiss %xmm2, %xmm0
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: ja .LBB117_2
+; AVX1-NEXT: ja .LBB123_2
; AVX1-NEXT: # %bb.1: # %entry
; AVX1-NEXT: vmovaps %xmm0, %xmm3
-; AVX1-NEXT: .LBB117_2: # %entry
+; AVX1-NEXT: .LBB123_2: # %entry
; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vcvttss2si %xmm2, %rax
; AVX1-NEXT: setbe %cl
@@ -4680,10 +4930,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 {
; AVX1-NEXT: vmovss {{.*#+}} xmm3 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
; AVX1-NEXT: vcomiss %xmm3, %xmm0
; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: ja .LBB117_4
+; AVX1-NEXT: ja .LBB123_4
; AVX1-NEXT: # %bb.3: # %entry
; AVX1-NEXT: vmovaps %xmm0, %xmm4
-; AVX1-NEXT: .LBB117_4: # %entry
+; AVX1-NEXT: .LBB123_4: # %entry
; AVX1-NEXT: vsubss %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vcvttss2si %xmm3, %rax
; AVX1-NEXT: setbe %cl
@@ -4694,10 +4944,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 {
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
; AVX1-NEXT: vmovss {{.*#+}} xmm3 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
; AVX1-NEXT: vcomiss %xmm3, %xmm0
-; AVX1-NEXT: ja .LBB117_6
+; AVX1-NEXT: ja .LBB123_6
; AVX1-NEXT: # %bb.5: # %entry
; AVX1-NEXT: vmovaps %xmm0, %xmm1
-; AVX1-NEXT: .LBB117_6: # %entry
+; AVX1-NEXT: .LBB123_6: # %entry
; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm0
; AVX1-NEXT: vcvttss2si %xmm0, %rax
; AVX1-NEXT: setbe %cl
@@ -4735,10 +4985,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
; CHECK-NEXT: comiss %xmm0, %xmm2
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: xorps %xmm3, %xmm3
-; CHECK-NEXT: ja .LBB118_2
+; CHECK-NEXT: ja .LBB124_2
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: movaps %xmm2, %xmm3
-; CHECK-NEXT: .LBB118_2: # %entry
+; CHECK-NEXT: .LBB124_2: # %entry
; CHECK-NEXT: subss %xmm3, %xmm0
; CHECK-NEXT: cvttss2si %xmm0, %rcx
; CHECK-NEXT: setbe %al
@@ -4748,10 +4998,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: comiss %xmm0, %xmm2
; CHECK-NEXT: xorps %xmm4, %xmm4
-; CHECK-NEXT: ja .LBB118_4
+; CHECK-NEXT: ja .LBB124_4
; CHECK-NEXT: # %bb.3: # %entry
; CHECK-NEXT: movaps %xmm2, %xmm4
-; CHECK-NEXT: .LBB118_4: # %entry
+; CHECK-NEXT: .LBB124_4: # %entry
; CHECK-NEXT: movq %rax, %xmm3
; CHECK-NEXT: subss %xmm4, %xmm0
; CHECK-NEXT: cvttss2si %xmm0, %rax
@@ -4763,10 +5013,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
; CHECK-NEXT: movss {{.*#+}} xmm4 = [4.5E+1,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: comiss %xmm4, %xmm2
; CHECK-NEXT: xorps %xmm5, %xmm5
-; CHECK-NEXT: ja .LBB118_6
+; CHECK-NEXT: ja .LBB124_6
; CHECK-NEXT: # %bb.5: # %entry
; CHECK-NEXT: movaps %xmm2, %xmm5
-; CHECK-NEXT: .LBB118_6: # %entry
+; CHECK-NEXT: .LBB124_6: # %entry
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; CHECK-NEXT: subss %xmm5, %xmm4
; CHECK-NEXT: cvttss2si %xmm4, %rax
@@ -4777,10 +5027,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
; CHECK-NEXT: movq %rcx, %xmm3
; CHECK-NEXT: movss {{.*#+}} xmm4 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: comiss %xmm4, %xmm2
-; CHECK-NEXT: ja .LBB118_8
+; CHECK-NEXT: ja .LBB124_8
; CHECK-NEXT: # %bb.7: # %entry
; CHECK-NEXT: movaps %xmm2, %xmm1
-; CHECK-NEXT: .LBB118_8: # %entry
+; CHECK-NEXT: .LBB124_8: # %entry
; CHECK-NEXT: subss %xmm1, %xmm4
; CHECK-NEXT: cvttss2si %xmm4, %rax
; CHECK-NEXT: setbe %cl
@@ -4798,10 +5048,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
; AVX1-NEXT: vcomiss %xmm2, %xmm0
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: ja .LBB118_2
+; AVX1-NEXT: ja .LBB124_2
; AVX1-NEXT: # %bb.1: # %entry
; AVX1-NEXT: vmovaps %xmm0, %xmm3
-; AVX1-NEXT: .LBB118_2: # %entry
+; AVX1-NEXT: .LBB124_2: # %entry
; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vcvttss2si %xmm2, %rcx
; AVX1-NEXT: setbe %al
@@ -4811,10 +5061,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
; AVX1-NEXT: vmovss {{.*#+}} xmm3 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
; AVX1-NEXT: vcomiss %xmm3, %xmm0
; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: ja .LBB118_4
+; AVX1-NEXT: ja .LBB124_4
; AVX1-NEXT: # %bb.3: # %entry
; AVX1-NEXT: vmovaps %xmm0, %xmm4
-; AVX1-NEXT: .LBB118_4: # %entry
+; AVX1-NEXT: .LBB124_4: # %entry
; AVX1-NEXT: vmovq %rax, %xmm2
; AVX1-NEXT: vsubss %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vcvttss2si %xmm3, %rax
@@ -4826,10 +5076,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
; AVX1-NEXT: vmovss {{.*#+}} xmm4 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
; AVX1-NEXT: vcomiss %xmm4, %xmm0
; AVX1-NEXT: vxorps %xmm5, %xmm5, %xmm5
-; AVX1-NEXT: ja .LBB118_6
+; AVX1-NEXT: ja .LBB124_6
; AVX1-NEXT: # %bb.5: # %entry
; AVX1-NEXT: vmovaps %xmm0, %xmm5
-; AVX1-NEXT: .LBB118_6: # %entry
+; AVX1-NEXT: .LBB124_6: # %entry
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
; AVX1-NEXT: vsubss %xmm5, %xmm4, %xmm3
; AVX1-NEXT: vcvttss2si %xmm3, %rax
@@ -4840,10 +5090,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
; AVX1-NEXT: vmovq %rcx, %xmm3
; AVX1-NEXT: vmovss {{.*#+}} xmm4 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
; AVX1-NEXT: vcomiss %xmm4, %xmm0
-; AVX1-NEXT: ja .LBB118_8
+; AVX1-NEXT: ja .LBB124_8
; AVX1-NEXT: # %bb.7: # %entry
; AVX1-NEXT: vmovaps %xmm0, %xmm1
-; AVX1-NEXT: .LBB118_8: # %entry
+; AVX1-NEXT: .LBB124_8: # %entry
; AVX1-NEXT: vsubss %xmm1, %xmm4, %xmm0
; AVX1-NEXT: vcvttss2si %xmm0, %rax
; AVX1-NEXT: setbe %cl
@@ -5036,10 +5286,10 @@ define <1 x i64> @constrained_vector_fptoui_v1i64_v1f64() #0 {
; CHECK-NEXT: movsd {{.*#+}} xmm2 = [9.2233720368547758E+18,0.0E+0]
; CHECK-NEXT: comisd %xmm0, %xmm2
; CHECK-NEXT: xorpd %xmm1, %xmm1
-; CHECK-NEXT: ja .LBB123_2
+; CHECK-NEXT: ja .LBB129_2
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: movapd %xmm2, %xmm1
-; CHECK-NEXT: .LBB123_2: # %entry
+; CHECK-NEXT: .LBB129_2: # %entry
; CHECK-NEXT: subsd %xmm1, %xmm0
; CHECK-NEXT: cvttsd2si %xmm0, %rcx
; CHECK-NEXT: setbe %al
@@ -5054,10 +5304,10 @@ define <1 x i64> @constrained_vector_fptoui_v1i64_v1f64() #0 {
; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
; AVX1-NEXT: vcomisd %xmm0, %xmm1
; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: ja .LBB123_2
+; AVX1-NEXT: ja .LBB129_2
; AVX1-NEXT: # %bb.1: # %entry
; AVX1-NEXT: vmovapd %xmm1, %xmm2
-; AVX1-NEXT: .LBB123_2: # %entry
+; AVX1-NEXT: .LBB129_2: # %entry
; AVX1-NEXT: vsubsd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vcvttsd2si %xmm0, %rcx
; AVX1-NEXT: setbe %al
@@ -5085,10 +5335,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 {
; CHECK-NEXT: comisd %xmm2, %xmm1
; CHECK-NEXT: xorpd %xmm0, %xmm0
; CHECK-NEXT: xorpd %xmm3, %xmm3
-; CHECK-NEXT: ja .LBB124_2
+; CHECK-NEXT: ja .LBB130_2
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: movapd %xmm1, %xmm3
-; CHECK-NEXT: .LBB124_2: # %entry
+; CHECK-NEXT: .LBB130_2: # %entry
; CHECK-NEXT: subsd %xmm3, %xmm2
; CHECK-NEXT: cvttsd2si %xmm2, %rax
; CHECK-NEXT: setbe %cl
@@ -5098,10 +5348,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 {
; CHECK-NEXT: movq %rcx, %xmm2
; CHECK-NEXT: movsd {{.*#+}} xmm3 = [4.2100000000000001E+1,0.0E+0]
; CHECK-NEXT: comisd %xmm3, %xmm1
-; CHECK-NEXT: ja .LBB124_4
+; CHECK-NEXT: ja .LBB130_4
; CHECK-NEXT: # %bb.3: # %entry
; CHECK-NEXT: movapd %xmm1, %xmm0
-; CHECK-NEXT: .LBB124_4: # %entry
+; CHECK-NEXT: .LBB130_4: # %entry
; CHECK-NEXT: subsd %xmm0, %xmm3
; CHECK-NEXT: cvttsd2si %xmm3, %rax
; CHECK-NEXT: setbe %cl
@@ -5119,10 +5369,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 {
; AVX1-NEXT: vcomisd %xmm2, %xmm0
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: ja .LBB124_2
+; AVX1-NEXT: ja .LBB130_2
; AVX1-NEXT: # %bb.1: # %entry
; AVX1-NEXT: vmovapd %xmm0, %xmm3
-; AVX1-NEXT: .LBB124_2: # %entry
+; AVX1-NEXT: .LBB130_2: # %entry
; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vcvttsd2si %xmm2, %rax
; AVX1-NEXT: setbe %cl
@@ -5132,10 +5382,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 {
; AVX1-NEXT: vmovq %rcx, %xmm2
; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = [4.2100000000000001E+1,0.0E+0]
; AVX1-NEXT: vcomisd %xmm3, %xmm0
-; AVX1-NEXT: ja .LBB124_4
+; AVX1-NEXT: ja .LBB130_4
; AVX1-NEXT: # %bb.3: # %entry
; AVX1-NEXT: vmovapd %xmm0, %xmm1
-; AVX1-NEXT: .LBB124_4: # %entry
+; AVX1-NEXT: .LBB130_4: # %entry
; AVX1-NEXT: vsubsd %xmm1, %xmm3, %xmm0
; AVX1-NEXT: vcvttsd2si %xmm0, %rax
; AVX1-NEXT: setbe %cl
@@ -5177,10 +5427,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 {
; CHECK-NEXT: comisd %xmm2, %xmm1
; CHECK-NEXT: xorpd %xmm0, %xmm0
; CHECK-NEXT: xorpd %xmm3, %xmm3
-; CHECK-NEXT: ja .LBB125_2
+; CHECK-NEXT: ja .LBB131_2
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: movapd %xmm1, %xmm3
-; CHECK-NEXT: .LBB125_2: # %entry
+; CHECK-NEXT: .LBB131_2: # %entry
; CHECK-NEXT: subsd %xmm3, %xmm2
; CHECK-NEXT: cvttsd2si %xmm2, %rcx
; CHECK-NEXT: setbe %al
@@ -5190,10 +5440,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 {
; CHECK-NEXT: movsd {{.*#+}} xmm2 = [4.2200000000000003E+1,0.0E+0]
; CHECK-NEXT: comisd %xmm2, %xmm1
; CHECK-NEXT: xorpd %xmm3, %xmm3
-; CHECK-NEXT: ja .LBB125_4
+; CHECK-NEXT: ja .LBB131_4
; CHECK-NEXT: # %bb.3: # %entry
; CHECK-NEXT: movapd %xmm1, %xmm3
-; CHECK-NEXT: .LBB125_4: # %entry
+; CHECK-NEXT: .LBB131_4: # %entry
; CHECK-NEXT: subsd %xmm3, %xmm2
; CHECK-NEXT: cvttsd2si %xmm2, %rcx
; CHECK-NEXT: setbe %dl
@@ -5202,10 +5452,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 {
; CHECK-NEXT: xorq %rcx, %rdx
; CHECK-NEXT: movsd {{.*#+}} xmm2 = [4.2299999999999997E+1,0.0E+0]
; CHECK-NEXT: comisd %xmm2, %xmm1
-; CHECK-NEXT: ja .LBB125_6
+; CHECK-NEXT: ja .LBB131_6
; CHECK-NEXT: # %bb.5: # %entry
; CHECK-NEXT: movapd %xmm1, %xmm0
-; CHECK-NEXT: .LBB125_6: # %entry
+; CHECK-NEXT: .LBB131_6: # %entry
; CHECK-NEXT: subsd %xmm0, %xmm2
; CHECK-NEXT: cvttsd2si %xmm2, %rsi
; CHECK-NEXT: setbe %cl
@@ -5221,10 +5471,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 {
; AVX1-NEXT: vcomisd %xmm2, %xmm0
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: ja .LBB125_2
+; AVX1-NEXT: ja .LBB131_2
; AVX1-NEXT: # %bb.1: # %entry
; AVX1-NEXT: vmovapd %xmm0, %xmm3
-; AVX1-NEXT: .LBB125_2: # %entry
+; AVX1-NEXT: .LBB131_2: # %entry
; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vcvttsd2si %xmm2, %rax
; AVX1-NEXT: setbe %cl
@@ -5235,10 +5485,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 {
; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = [4.2100000000000001E+1,0.0E+0]
; AVX1-NEXT: vcomisd %xmm3, %xmm0
; AVX1-NEXT: vxorpd %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: ja .LBB125_4
+; AVX1-NEXT: ja .LBB131_4
; AVX1-NEXT: # %bb.3: # %entry
; AVX1-NEXT: vmovapd %xmm0, %xmm4
-; AVX1-NEXT: .LBB125_4: # %entry
+; AVX1-NEXT: .LBB131_4: # %entry
; AVX1-NEXT: vsubsd %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vcvttsd2si %xmm3, %rax
; AVX1-NEXT: setbe %cl
@@ -5249,10 +5499,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 {
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = [4.2299999999999997E+1,0.0E+0]
; AVX1-NEXT: vcomisd %xmm3, %xmm0
-; AVX1-NEXT: ja .LBB125_6
+; AVX1-NEXT: ja .LBB131_6
; AVX1-NEXT: # %bb.5: # %entry
; AVX1-NEXT: vmovapd %xmm0, %xmm1
-; AVX1-NEXT: .LBB125_6: # %entry
+; AVX1-NEXT: .LBB131_6: # %entry
; AVX1-NEXT: vsubsd %xmm1, %xmm3, %xmm0
; AVX1-NEXT: vcvttsd2si %xmm0, %rax
; AVX1-NEXT: setbe %cl
@@ -5290,10 +5540,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
; CHECK-NEXT: comisd %xmm0, %xmm2
; CHECK-NEXT: xorpd %xmm1, %xmm1
; CHECK-NEXT: xorpd %xmm3, %xmm3
-; CHECK-NEXT: ja .LBB126_2
+; CHECK-NEXT: ja .LBB132_2
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: movapd %xmm2, %xmm3
-; CHECK-NEXT: .LBB126_2: # %entry
+; CHECK-NEXT: .LBB132_2: # %entry
; CHECK-NEXT: subsd %xmm3, %xmm0
; CHECK-NEXT: cvttsd2si %xmm0, %rcx
; CHECK-NEXT: setbe %al
@@ -5303,10 +5553,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
; CHECK-NEXT: comisd %xmm0, %xmm2
; CHECK-NEXT: xorpd %xmm4, %xmm4
-; CHECK-NEXT: ja .LBB126_4
+; CHECK-NEXT: ja .LBB132_4
; CHECK-NEXT: # %bb.3: # %entry
; CHECK-NEXT: movapd %xmm2, %xmm4
-; CHECK-NEXT: .LBB126_4: # %entry
+; CHECK-NEXT: .LBB132_4: # %entry
; CHECK-NEXT: movq %rax, %xmm3
; CHECK-NEXT: subsd %xmm4, %xmm0
; CHECK-NEXT: cvttsd2si %xmm0, %rax
@@ -5318,10 +5568,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
; CHECK-NEXT: movsd {{.*#+}} xmm4 = [4.2399999999999999E+1,0.0E+0]
; CHECK-NEXT: comisd %xmm4, %xmm2
; CHECK-NEXT: xorpd %xmm5, %xmm5
-; CHECK-NEXT: ja .LBB126_6
+; CHECK-NEXT: ja .LBB132_6
; CHECK-NEXT: # %bb.5: # %entry
; CHECK-NEXT: movapd %xmm2, %xmm5
-; CHECK-NEXT: .LBB126_6: # %entry
+; CHECK-NEXT: .LBB132_6: # %entry
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; CHECK-NEXT: subsd %xmm5, %xmm4
; CHECK-NEXT: cvttsd2si %xmm4, %rax
@@ -5332,10 +5582,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
; CHECK-NEXT: movq %rcx, %xmm3
; CHECK-NEXT: movsd {{.*#+}} xmm4 = [4.2299999999999997E+1,0.0E+0]
; CHECK-NEXT: comisd %xmm4, %xmm2
-; CHECK-NEXT: ja .LBB126_8
+; CHECK-NEXT: ja .LBB132_8
; CHECK-NEXT: # %bb.7: # %entry
; CHECK-NEXT: movapd %xmm2, %xmm1
-; CHECK-NEXT: .LBB126_8: # %entry
+; CHECK-NEXT: .LBB132_8: # %entry
; CHECK-NEXT: subsd %xmm1, %xmm4
; CHECK-NEXT: cvttsd2si %xmm4, %rax
; CHECK-NEXT: setbe %cl
@@ -5353,10 +5603,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
; AVX1-NEXT: vcomisd %xmm2, %xmm0
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: ja .LBB126_2
+; AVX1-NEXT: ja .LBB132_2
; AVX1-NEXT: # %bb.1: # %entry
; AVX1-NEXT: vmovapd %xmm0, %xmm3
-; AVX1-NEXT: .LBB126_2: # %entry
+; AVX1-NEXT: .LBB132_2: # %entry
; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vcvttsd2si %xmm2, %rcx
; AVX1-NEXT: setbe %al
@@ -5366,10 +5616,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = [4.2299999999999997E+1,0.0E+0]
; AVX1-NEXT: vcomisd %xmm3, %xmm0
; AVX1-NEXT: vxorpd %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: ja .LBB126_4
+; AVX1-NEXT: ja .LBB132_4
; AVX1-NEXT: # %bb.3: # %entry
; AVX1-NEXT: vmovapd %xmm0, %xmm4
-; AVX1-NEXT: .LBB126_4: # %entry
+; AVX1-NEXT: .LBB132_4: # %entry
; AVX1-NEXT: vmovq %rax, %xmm2
; AVX1-NEXT: vsubsd %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vcvttsd2si %xmm3, %rax
@@ -5381,10 +5631,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
; AVX1-NEXT: vmovsd {{.*#+}} xmm4 = [4.2200000000000003E+1,0.0E+0]
; AVX1-NEXT: vcomisd %xmm4, %xmm0
; AVX1-NEXT: vxorpd %xmm5, %xmm5, %xmm5
-; AVX1-NEXT: ja .LBB126_6
+; AVX1-NEXT: ja .LBB132_6
; AVX1-NEXT: # %bb.5: # %entry
; AVX1-NEXT: vmovapd %xmm0, %xmm5
-; AVX1-NEXT: .LBB126_6: # %entry
+; AVX1-NEXT: .LBB132_6: # %entry
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
; AVX1-NEXT: vsubsd %xmm5, %xmm4, %xmm3
; AVX1-NEXT: vcvttsd2si %xmm3, %rax
@@ -5395,10 +5645,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
; AVX1-NEXT: vmovq %rcx, %xmm3
; AVX1-NEXT: vmovsd {{.*#+}} xmm4 = [4.2100000000000001E+1,0.0E+0]
; AVX1-NEXT: vcomisd %xmm4, %xmm0
-; AVX1-NEXT: ja .LBB126_8
+; AVX1-NEXT: ja .LBB132_8
; AVX1-NEXT: # %bb.7: # %entry
; AVX1-NEXT: vmovapd %xmm0, %xmm1
-; AVX1-NEXT: .LBB126_8: # %entry
+; AVX1-NEXT: .LBB132_8: # %entry
; AVX1-NEXT: vsubsd %xmm1, %xmm4, %xmm0
; AVX1-NEXT: vcvttsd2si %xmm0, %rax
; AVX1-NEXT: setbe %cl
@@ -5620,108 +5870,121 @@ entry:
ret <4 x double> %result
}
-define <1 x float> @constrained_vector_ceil_v1f32() #0 {
-; CHECK-LABEL: constrained_vector_ceil_v1f32:
+define <1 x float> @constrained_vector_ceil_v1f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_ceil_v1f32_var:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq ceilf at PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_ceil_v1f32:
+; AVX-LABEL: constrained_vector_ceil_v1f32_var:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <1 x float>, ptr %a
%ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(
- <1 x float> <float 1.5>,
+ <1 x float> %b,
metadata !"fpexcept.strict") #0
ret <1 x float> %ceil
}
-define <2 x double> @constrained_vector_ceil_v2f64() #0 {
-; CHECK-LABEL: constrained_vector_ceil_v2f64:
+define <2 x double> @constrained_vector_ceil_v2f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_ceil_v2f64_var:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
-; CHECK-NEXT: callq ceil at PLT
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movaps (%rdi), %xmm0
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
; CHECK-NEXT: callq ceil at PLT
-; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq ceil at PLT
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_ceil_v2f64:
+; AVX-LABEL: constrained_vector_ceil_v2f64_var:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vroundpd $10, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX-NEXT: vroundpd $10, (%rdi), %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <2 x double>, ptr %a
%ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
- <2 x double> <double 1.1, double 1.9>,
+ <2 x double> %b,
metadata !"fpexcept.strict") #0
ret <2 x double> %ceil
}
-define <3 x float> @constrained_vector_ceil_v3f32() #0 {
-; CHECK-LABEL: constrained_vector_ceil_v3f32:
+define <3 x float> @constrained_vector_ceil_v3f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_ceil_v3f32_var:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $40, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq ceilf at PLT
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq ceilf at PLT
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq ceilf at PLT
-; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_ceil_v3f32:
+; AVX-LABEL: constrained_vector_ceil_v3f32_var:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $10, %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vmovss {{.*#+}} xmm2 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: vroundss $10, %xmm2, %xmm2, %xmm2
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX-NEXT: retq
entry:
+ %b = load <3 x float>, ptr %a
%ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
- <3 x float> <float 1.5, float 2.5, float 3.5>,
+ <3 x float> %b,
metadata !"fpexcept.strict") #0
ret <3 x float> %ceil
}
-define <3 x double> @constrained_vector_ceil_v3f64() #0 {
-; CHECK-LABEL: constrained_vector_ceil_v3f64:
+define <3 x double> @constrained_vector_ceil_v3f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_ceil_v3f64_var:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq ceil at PLT
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq ceil at PLT
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: callq ceil at PLT
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
@@ -5730,127 +5993,141 @@ define <3 x double> @constrained_vector_ceil_v3f64() #0 {
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_ceil_v3f64:
+; AVX-LABEL: constrained_vector_ceil_v3f64_var:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vroundpd $10, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX-NEXT: vroundpd $10, (%rdi), %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX-NEXT: retq
entry:
+ %b = load <3 x double>, ptr %a
%ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64(
- <3 x double> <double 1.1, double 1.9, double 1.5>,
+ <3 x double> %b,
metadata !"fpexcept.strict") #0
ret <3 x double> %ceil
}
-define <1 x float> @constrained_vector_floor_v1f32() #0 {
-; CHECK-LABEL: constrained_vector_floor_v1f32:
+define <1 x float> @constrained_vector_floor_v1f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_floor_v1f32_var:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq floorf at PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_floor_v1f32:
+; AVX-LABEL: constrained_vector_floor_v1f32_var:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <1 x float>, ptr %a
%floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32(
- <1 x float> <float 1.5>,
+ <1 x float> %b,
metadata !"fpexcept.strict") #0
ret <1 x float> %floor
}
-define <2 x double> @constrained_vector_floor_v2f64() #0 {
-; CHECK-LABEL: constrained_vector_floor_v2f64:
+define <2 x double> @constrained_vector_floor_v2f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_floor_v2f64_var:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
-; CHECK-NEXT: callq floor at PLT
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movaps (%rdi), %xmm0
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
; CHECK-NEXT: callq floor at PLT
-; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq floor at PLT
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_floor_v2f64:
+; AVX-LABEL: constrained_vector_floor_v2f64_var:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vroundpd $9, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX-NEXT: vroundpd $9, (%rdi), %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <2 x double>, ptr %a
%floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
- <2 x double> <double 1.1, double 1.9>,
+ <2 x double> %b,
metadata !"fpexcept.strict") #0
ret <2 x double> %floor
}
-define <3 x float> @constrained_vector_floor_v3f32() #0 {
-; CHECK-LABEL: constrained_vector_floor_v3f32:
+define <3 x float> @constrained_vector_floor_v3f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_floor_v3f32_var:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $40, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq floorf at PLT
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq floorf at PLT
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq floorf at PLT
-; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_floor_v3f32:
+; AVX-LABEL: constrained_vector_floor_v3f32_var:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $9, %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vmovss {{.*#+}} xmm2 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: vroundss $9, %xmm2, %xmm2, %xmm2
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX-NEXT: retq
entry:
+ %b = load <3 x float>, ptr %a
%floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
- <3 x float> <float 1.5, float 2.5, float 3.5>,
+ <3 x float> %b,
metadata !"fpexcept.strict") #0
ret <3 x float> %floor
}
-define <3 x double> @constrained_vector_floor_v3f64() #0 {
-; CHECK-LABEL: constrained_vector_floor_v3f64:
+define <3 x double> @constrained_vector_floor_v3f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_floor_v3f64_var:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq floor at PLT
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq floor at PLT
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: callq floor at PLT
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
@@ -5859,149 +6136,175 @@ define <3 x double> @constrained_vector_floor_v3f64() #0 {
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_floor_v3f64:
+; AVX-LABEL: constrained_vector_floor_v3f64_var:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vroundpd $9, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX-NEXT: vroundpd $9, (%rdi), %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX-NEXT: retq
entry:
+ %b = load <3 x double>, ptr %a
%floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64(
- <3 x double> <double 1.1, double 1.9, double 1.5>,
+ <3 x double> %b,
metadata !"fpexcept.strict") #0
ret <3 x double> %floor
}
-define <1 x float> @constrained_vector_round_v1f32() #0 {
-; CHECK-LABEL: constrained_vector_round_v1f32:
+define <1 x float> @constrained_vector_round_v1f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_round_v1f32_var:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq roundf at PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_round_v1f32:
+; AVX-LABEL: constrained_vector_round_v1f32_var:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: callq roundf at PLT
; AVX-NEXT: popq %rax
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
+ %b = load <1 x float>, ptr %a
%round = call <1 x float> @llvm.experimental.constrained.round.v1f32(
- <1 x float> <float 1.5>,
+ <1 x float> %b,
metadata !"fpexcept.strict") #0
ret <1 x float> %round
}
-define <2 x double> @constrained_vector_round_v2f64() #0 {
-; CHECK-LABEL: constrained_vector_round_v2f64:
+define <2 x double> @constrained_vector_round_v2f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_round_v2f64_var:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
-; CHECK-NEXT: callq round at PLT
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movaps (%rdi), %xmm0
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
; CHECK-NEXT: callq round at PLT
-; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq round at PLT
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_round_v2f64:
+; AVX-LABEL: constrained_vector_round_v2f64_var:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: subq $24, %rsp
-; AVX-NEXT: .cfi_def_cfa_offset 32
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
+; AVX-NEXT: subq $40, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 48
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: callq round at PLT
-; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
+; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; AVX-NEXT: # xmm0 = mem[0],zero
; AVX-NEXT: callq round at PLT
-; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
-; AVX-NEXT: addq $24, %rsp
+; AVX-NEXT: addq $40, %rsp
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
+ %b = load <2 x double>, ptr %a
%round = call <2 x double> @llvm.experimental.constrained.round.v2f64(
- <2 x double> <double 1.1, double 1.9>,
+ <2 x double> %b,
metadata !"fpexcept.strict") #0
ret <2 x double> %round
}
-define <3 x float> @constrained_vector_round_v3f32() #0 {
-; CHECK-LABEL: constrained_vector_round_v3f32:
+define <3 x float> @constrained_vector_round_v3f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_round_v3f32_var:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $40, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq roundf at PLT
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq roundf at PLT
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq roundf at PLT
-; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_round_v3f32:
+; AVX-LABEL: constrained_vector_round_v3f32_var:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: subq $40, %rsp
-; AVX-NEXT: .cfi_def_cfa_offset 48
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: pushq %rbx
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: subq $48, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 64
+; AVX-NEXT: .cfi_offset %rbx, -16
+; AVX-NEXT: movq %rdi, %rbx
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: callq roundf at PLT
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; AVX-NEXT: callq roundf at PLT
-; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: callq roundf at PLT
-; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
-; AVX-NEXT: addq $40, %rsp
+; AVX-NEXT: addq $48, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: popq %rbx
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
+ %b = load <3 x float>, ptr %a
%round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
- <3 x float> <float 1.5, float 2.5, float 3.5>,
+ <3 x float> %b,
metadata !"fpexcept.strict") #0
ret <3 x float> %round
}
-define <3 x double> @constrained_vector_round_v3f64() #0 {
-; CHECK-LABEL: constrained_vector_round_v3f64:
+define <3 x double> @constrained_vector_round_v3f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_round_v3f64_var:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq round at PLT
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq round at PLT
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: callq round at PLT
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
@@ -6010,139 +6313,162 @@ define <3 x double> @constrained_vector_round_v3f64() #0 {
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_round_v3f64:
+; AVX-LABEL: constrained_vector_round_v3f64_var:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: subq $40, %rsp
-; AVX-NEXT: .cfi_def_cfa_offset 48
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
+; AVX-NEXT: pushq %rbx
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: subq $48, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 64
+; AVX-NEXT: .cfi_offset %rbx, -16
+; AVX-NEXT: movq %rdi, %rbx
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: callq round at PLT
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
+; AVX-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; AVX-NEXT: # xmm0 = mem[0],zero
; AVX-NEXT: callq round at PLT
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
-; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; AVX-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vzeroupper
; AVX-NEXT: callq round at PLT
-; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
+; AVX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX-NEXT: addq $40, %rsp
+; AVX-NEXT: addq $48, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: popq %rbx
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
+ %b = load <3 x double>, ptr %a
%round = call <3 x double> @llvm.experimental.constrained.round.v3f64(
- <3 x double> <double 1.1, double 1.9, double 1.5>,
+ <3 x double> %b,
metadata !"fpexcept.strict") #0
ret <3 x double> %round
}
-define <1 x float> @constrained_vector_trunc_v1f32() #0 {
-; CHECK-LABEL: constrained_vector_trunc_v1f32:
+define <1 x float> @constrained_vector_trunc_v1f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_trunc_v1f32_var:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq truncf at PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_trunc_v1f32:
+; AVX-LABEL: constrained_vector_trunc_v1f32_var:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <1 x float>, ptr %a
%trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(
- <1 x float> <float 1.5>,
+ <1 x float> %b,
metadata !"fpexcept.strict") #0
ret <1 x float> %trunc
}
-define <2 x double> @constrained_vector_trunc_v2f64() #0 {
-; CHECK-LABEL: constrained_vector_trunc_v2f64:
+define <2 x double> @constrained_vector_trunc_v2f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_trunc_v2f64_var:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
-; CHECK-NEXT: callq trunc at PLT
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movaps (%rdi), %xmm0
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
; CHECK-NEXT: callq trunc at PLT
-; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq trunc at PLT
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_trunc_v2f64:
+; AVX-LABEL: constrained_vector_trunc_v2f64_var:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vroundpd $11, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX-NEXT: vroundpd $11, (%rdi), %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <2 x double>, ptr %a
%trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
- <2 x double> <double 1.1, double 1.9>,
+ <2 x double> %b,
metadata !"fpexcept.strict") #0
ret <2 x double> %trunc
}
-define <3 x float> @constrained_vector_trunc_v3f32() #0 {
-; CHECK-LABEL: constrained_vector_trunc_v3f32:
+define <3 x float> @constrained_vector_trunc_v3f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_trunc_v3f32_var:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $40, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq truncf at PLT
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq truncf at PLT
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq truncf at PLT
-; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_trunc_v3f32:
+; AVX-LABEL: constrained_vector_trunc_v3f32_var:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $11, %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vmovss {{.*#+}} xmm2 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: vroundss $11, %xmm2, %xmm2, %xmm2
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX-NEXT: retq
entry:
+ %b = load <3 x float>, ptr %a
%trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
- <3 x float> <float 1.5, float 2.5, float 3.5>,
+ <3 x float> %b,
metadata !"fpexcept.strict") #0
ret <3 x float> %trunc
}
-define <3 x double> @constrained_vector_trunc_v3f64() #0 {
-; CHECK-LABEL: constrained_vector_trunc_v3f64:
+define <3 x double> @constrained_vector_trunc_v3f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_trunc_v3f64_var:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq trunc at PLT
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq trunc at PLT
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: callq trunc at PLT
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
@@ -6151,20 +6477,21 @@ define <3 x double> @constrained_vector_trunc_v3f64() #0 {
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_trunc_v3f64:
+; AVX-LABEL: constrained_vector_trunc_v3f64_var:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vroundpd $11, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX-NEXT: vroundpd $11, (%rdi), %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX-NEXT: retq
entry:
+ %b = load <3 x double>, ptr %a
%trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64(
- <3 x double> <double 1.1, double 1.9, double 1.5>,
+ <3 x double> %b,
metadata !"fpexcept.strict") #0
ret <3 x double> %trunc
}
@@ -6757,10 +7084,10 @@ define <1 x double> @constrained_vector_uitofp_v1f64_v1i64(<1 x i64> %x) #0 {
; CHECK-NEXT: testq %rdi, %rdi
; CHECK-NEXT: cmovnsq %rdi, %rcx
; CHECK-NEXT: cvtsi2sd %rcx, %xmm0
-; CHECK-NEXT: jns .LBB169_2
+; CHECK-NEXT: jns .LBB175_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: addsd %xmm0, %xmm0
-; CHECK-NEXT: .LBB169_2: # %entry
+; CHECK-NEXT: .LBB175_2: # %entry
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_uitofp_v1f64_v1i64:
@@ -6773,10 +7100,10 @@ define <1 x double> @constrained_vector_uitofp_v1f64_v1i64(<1 x i64> %x) #0 {
; AVX1-NEXT: testq %rdi, %rdi
; AVX1-NEXT: cmovnsq %rdi, %rcx
; AVX1-NEXT: vcvtsi2sd %rcx, %xmm0, %xmm0
-; AVX1-NEXT: jns .LBB169_2
+; AVX1-NEXT: jns .LBB175_2
; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddsd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: .LBB169_2: # %entry
+; AVX1-NEXT: .LBB175_2: # %entry
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_uitofp_v1f64_v1i64:
@@ -6802,10 +7129,10 @@ define <1 x float> @constrained_vector_uitofp_v1f32_v1i64(<1 x i64> %x) #0 {
; CHECK-NEXT: testq %rdi, %rdi
; CHECK-NEXT: cmovnsq %rdi, %rcx
; CHECK-NEXT: cvtsi2ss %rcx, %xmm0
-; CHECK-NEXT: jns .LBB170_2
+; CHECK-NEXT: jns .LBB176_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: addss %xmm0, %xmm0
-; CHECK-NEXT: .LBB170_2: # %entry
+; CHECK-NEXT: .LBB176_2: # %entry
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_uitofp_v1f32_v1i64:
@@ -6818,10 +7145,10 @@ define <1 x float> @constrained_vector_uitofp_v1f32_v1i64(<1 x i64> %x) #0 {
; AVX1-NEXT: testq %rdi, %rdi
; AVX1-NEXT: cmovnsq %rdi, %rcx
; AVX1-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0
-; AVX1-NEXT: jns .LBB170_2
+; AVX1-NEXT: jns .LBB176_2
; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: .LBB170_2: # %entry
+; AVX1-NEXT: .LBB176_2: # %entry
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i64:
@@ -6920,10 +7247,10 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 {
; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsi2sd %rdx, %xmm0
-; CHECK-NEXT: jns .LBB173_2
+; CHECK-NEXT: jns .LBB179_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: addsd %xmm0, %xmm0
-; CHECK-NEXT: .LBB173_2: # %entry
+; CHECK-NEXT: .LBB179_2: # %entry
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; CHECK-NEXT: movq %xmm1, %rax
; CHECK-NEXT: movq %rax, %rcx
@@ -6935,10 +7262,10 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 {
; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: cvtsi2sd %rdx, %xmm1
-; CHECK-NEXT: jns .LBB173_4
+; CHECK-NEXT: jns .LBB179_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: addsd %xmm1, %xmm1
-; CHECK-NEXT: .LBB173_4: # %entry
+; CHECK-NEXT: .LBB179_4: # %entry
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
;
@@ -6953,10 +7280,10 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 {
; AVX1-NEXT: testq %rax, %rax
; AVX1-NEXT: cmovnsq %rax, %rdx
; AVX1-NEXT: vcvtsi2sd %rdx, %xmm1, %xmm1
-; AVX1-NEXT: jns .LBB173_2
+; AVX1-NEXT: jns .LBB179_2
; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddsd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: .LBB173_2: # %entry
+; AVX1-NEXT: .LBB179_2: # %entry
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq %rcx
@@ -6966,10 +7293,10 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 {
; AVX1-NEXT: testq %rax, %rax
; AVX1-NEXT: cmovnsq %rax, %rdx
; AVX1-NEXT: vcvtsi2sd %rdx, %xmm2, %xmm0
-; AVX1-NEXT: jns .LBB173_4
+; AVX1-NEXT: jns .LBB179_4
; AVX1-NEXT: # %bb.3:
; AVX1-NEXT: vaddsd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: .LBB173_4: # %entry
+; AVX1-NEXT: .LBB179_4: # %entry
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX1-NEXT: retq
;
@@ -7011,10 +7338,10 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsi2ss %rdx, %xmm0
-; CHECK-NEXT: jns .LBB174_2
+; CHECK-NEXT: jns .LBB180_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: addss %xmm0, %xmm0
-; CHECK-NEXT: .LBB174_2: # %entry
+; CHECK-NEXT: .LBB180_2: # %entry
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; CHECK-NEXT: movq %xmm1, %rax
; CHECK-NEXT: movq %rax, %rcx
@@ -7026,10 +7353,10 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: cvtsi2ss %rdx, %xmm1
-; CHECK-NEXT: jns .LBB174_4
+; CHECK-NEXT: jns .LBB180_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: addss %xmm1, %xmm1
-; CHECK-NEXT: .LBB174_4: # %entry
+; CHECK-NEXT: .LBB180_4: # %entry
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: retq
;
@@ -7177,10 +7504,10 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
; CHECK-NEXT: testq %rdi, %rdi
; CHECK-NEXT: cmovnsq %rdi, %rcx
; CHECK-NEXT: cvtsi2sd %rcx, %xmm0
-; CHECK-NEXT: jns .LBB177_2
+; CHECK-NEXT: jns .LBB183_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: addsd %xmm0, %xmm0
-; CHECK-NEXT: .LBB177_2: # %entry
+; CHECK-NEXT: .LBB183_2: # %entry
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: shrq %rax
; CHECK-NEXT: movl %esi, %ecx
@@ -7189,10 +7516,10 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
; CHECK-NEXT: testq %rsi, %rsi
; CHECK-NEXT: cmovnsq %rsi, %rcx
; CHECK-NEXT: cvtsi2sd %rcx, %xmm1
-; CHECK-NEXT: jns .LBB177_4
+; CHECK-NEXT: jns .LBB183_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: addsd %xmm1, %xmm1
-; CHECK-NEXT: .LBB177_4: # %entry
+; CHECK-NEXT: .LBB183_4: # %entry
; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: shrq %rax
; CHECK-NEXT: movl %edx, %ecx
@@ -7201,10 +7528,10 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
; CHECK-NEXT: testq %rdx, %rdx
; CHECK-NEXT: cmovnsq %rdx, %rcx
; CHECK-NEXT: cvtsi2sd %rcx, %xmm2
-; CHECK-NEXT: jns .LBB177_6
+; CHECK-NEXT: jns .LBB183_6
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: addsd %xmm2, %xmm2
-; CHECK-NEXT: .LBB177_6: # %entry
+; CHECK-NEXT: .LBB183_6: # %entry
; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
; CHECK-NEXT: wait
@@ -7221,10 +7548,10 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
; AVX1-NEXT: testq %rax, %rax
; AVX1-NEXT: cmovnsq %rax, %rdx
; AVX1-NEXT: vcvtsi2sd %rdx, %xmm1, %xmm1
-; AVX1-NEXT: jns .LBB177_2
+; AVX1-NEXT: jns .LBB183_2
; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddsd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: .LBB177_2: # %entry
+; AVX1-NEXT: .LBB183_2: # %entry
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq %rcx
@@ -7234,10 +7561,10 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
; AVX1-NEXT: testq %rax, %rax
; AVX1-NEXT: cmovnsq %rax, %rdx
; AVX1-NEXT: vcvtsi2sd %rdx, %xmm2, %xmm2
-; AVX1-NEXT: jns .LBB177_4
+; AVX1-NEXT: jns .LBB183_4
; AVX1-NEXT: # %bb.3:
; AVX1-NEXT: vaddsd %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: .LBB177_4: # %entry
+; AVX1-NEXT: .LBB183_4: # %entry
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
@@ -7249,10 +7576,10 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
; AVX1-NEXT: testq %rax, %rax
; AVX1-NEXT: cmovnsq %rax, %rdx
; AVX1-NEXT: vcvtsi2sd %rdx, %xmm3, %xmm0
-; AVX1-NEXT: jns .LBB177_6
+; AVX1-NEXT: jns .LBB183_6
; AVX1-NEXT: # %bb.5:
; AVX1-NEXT: vaddsd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: .LBB177_6: # %entry
+; AVX1-NEXT: .LBB183_6: # %entry
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -7287,10 +7614,10 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; CHECK-NEXT: testq %rsi, %rsi
; CHECK-NEXT: cmovnsq %rsi, %rcx
; CHECK-NEXT: cvtsi2ss %rcx, %xmm1
-; CHECK-NEXT: jns .LBB178_2
+; CHECK-NEXT: jns .LBB184_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: addss %xmm1, %xmm1
-; CHECK-NEXT: .LBB178_2: # %entry
+; CHECK-NEXT: .LBB184_2: # %entry
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: shrq %rax
; CHECK-NEXT: movl %edi, %ecx
@@ -7299,10 +7626,10 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; CHECK-NEXT: testq %rdi, %rdi
; CHECK-NEXT: cmovnsq %rdi, %rcx
; CHECK-NEXT: cvtsi2ss %rcx, %xmm0
-; CHECK-NEXT: jns .LBB178_4
+; CHECK-NEXT: jns .LBB184_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: addss %xmm0, %xmm0
-; CHECK-NEXT: .LBB178_4: # %entry
+; CHECK-NEXT: .LBB184_4: # %entry
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: shrq %rax
@@ -7313,10 +7640,10 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; CHECK-NEXT: cmovnsq %rdx, %rcx
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: cvtsi2ss %rcx, %xmm1
-; CHECK-NEXT: jns .LBB178_6
+; CHECK-NEXT: jns .LBB184_6
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: addss %xmm1, %xmm1
-; CHECK-NEXT: .LBB178_6: # %entry
+; CHECK-NEXT: .LBB184_6: # %entry
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
;
@@ -7331,10 +7658,10 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; AVX1-NEXT: testq %rax, %rax
; AVX1-NEXT: cmovnsq %rax, %rdx
; AVX1-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1
-; AVX1-NEXT: jns .LBB178_2
+; AVX1-NEXT: jns .LBB184_2
; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: .LBB178_2: # %entry
+; AVX1-NEXT: .LBB184_2: # %entry
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq %rcx
@@ -7344,10 +7671,10 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; AVX1-NEXT: testq %rax, %rax
; AVX1-NEXT: cmovnsq %rax, %rdx
; AVX1-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm2
-; AVX1-NEXT: jns .LBB178_4
+; AVX1-NEXT: jns .LBB184_4
; AVX1-NEXT: # %bb.3:
; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: .LBB178_4: # %entry
+; AVX1-NEXT: .LBB184_4: # %entry
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
@@ -7359,10 +7686,10 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; AVX1-NEXT: testq %rax, %rax
; AVX1-NEXT: cmovnsq %rax, %rdx
; AVX1-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm0
-; AVX1-NEXT: jns .LBB178_6
+; AVX1-NEXT: jns .LBB184_6
; AVX1-NEXT: # %bb.5:
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: .LBB178_6: # %entry
+; AVX1-NEXT: .LBB184_6: # %entry
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -7477,10 +7804,10 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 {
; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsi2sd %rdx, %xmm0
-; CHECK-NEXT: jns .LBB181_2
+; CHECK-NEXT: jns .LBB187_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: addsd %xmm0, %xmm0
-; CHECK-NEXT: .LBB181_2: # %entry
+; CHECK-NEXT: .LBB187_2: # %entry
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; CHECK-NEXT: movq %xmm2, %rax
; CHECK-NEXT: movq %rax, %rcx
@@ -7491,10 +7818,10 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 {
; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: cvtsi2sd %rdx, %xmm3
-; CHECK-NEXT: jns .LBB181_4
+; CHECK-NEXT: jns .LBB187_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: addsd %xmm3, %xmm3
-; CHECK-NEXT: .LBB181_4: # %entry
+; CHECK-NEXT: .LBB187_4: # %entry
; CHECK-NEXT: movq %xmm1, %rax
; CHECK-NEXT: movq %rax, %rcx
; CHECK-NEXT: shrq %rcx
@@ -7505,10 +7832,10 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 {
; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: xorps %xmm2, %xmm2
; CHECK-NEXT: cvtsi2sd %rdx, %xmm2
-; CHECK-NEXT: jns .LBB181_6
+; CHECK-NEXT: jns .LBB187_6
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: addsd %xmm2, %xmm2
-; CHECK-NEXT: .LBB181_6: # %entry
+; CHECK-NEXT: .LBB187_6: # %entry
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; CHECK-NEXT: movq %xmm1, %rax
@@ -7521,10 +7848,10 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 {
; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: cvtsi2sd %rdx, %xmm1
-; CHECK-NEXT: jns .LBB181_8
+; CHECK-NEXT: jns .LBB187_8
; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: addsd %xmm1, %xmm1
-; CHECK-NEXT: .LBB181_8: # %entry
+; CHECK-NEXT: .LBB187_8: # %entry
; CHECK-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; CHECK-NEXT: movapd %xmm2, %xmm1
; CHECK-NEXT: retq
@@ -7601,10 +7928,10 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: cvtsi2ss %rdx, %xmm2
-; CHECK-NEXT: jns .LBB182_2
+; CHECK-NEXT: jns .LBB188_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: addss %xmm2, %xmm2
-; CHECK-NEXT: .LBB182_2: # %entry
+; CHECK-NEXT: .LBB188_2: # %entry
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; CHECK-NEXT: movq %xmm1, %rax
; CHECK-NEXT: movq %rax, %rcx
@@ -7615,10 +7942,10 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: cvtsi2ss %rdx, %xmm3
-; CHECK-NEXT: jns .LBB182_4
+; CHECK-NEXT: jns .LBB188_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: addss %xmm3, %xmm3
-; CHECK-NEXT: .LBB182_4: # %entry
+; CHECK-NEXT: .LBB188_4: # %entry
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: movq %rax, %rcx
; CHECK-NEXT: shrq %rcx
@@ -7629,10 +7956,10 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: cvtsi2ss %rdx, %xmm1
-; CHECK-NEXT: jns .LBB182_6
+; CHECK-NEXT: jns .LBB188_6
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: addss %xmm1, %xmm1
-; CHECK-NEXT: .LBB182_6: # %entry
+; CHECK-NEXT: .LBB188_6: # %entry
; CHECK-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; CHECK-NEXT: movq %xmm0, %rax
@@ -7645,10 +7972,10 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsi2ss %rdx, %xmm0
-; CHECK-NEXT: jns .LBB182_8
+; CHECK-NEXT: jns .LBB188_8
; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: addss %xmm0, %xmm0
-; CHECK-NEXT: .LBB182_8: # %entry
+; CHECK-NEXT: .LBB188_8: # %entry
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; CHECK-NEXT: movaps %xmm1, %xmm0
More information about the llvm-commits
mailing list