[llvm] [X86] Modify tests for constrained rounding functions (PR #116951)
Serge Pavlov via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 20 02:40:11 PST 2024
https://github.com/spavloff created https://github.com/llvm/llvm-project/pull/116951
The existing tests for constrained functions often use constant arguments. If constant evaluation is enhanced, such tests will not check code generation of the tested functions. To avoid it, the tests are modified to use loaded value instead of constants. Now only the tests for rounding functions are changed.
>From e7e83123a0796111d549c8a25eca5d15b8f82e91 Mon Sep 17 00:00:00 2001
From: Serge Pavlov <sepavloff at gmail.com>
Date: Wed, 20 Nov 2024 13:38:18 +0700
Subject: [PATCH] [X86] Modify tests for constrained rounding functions
The existing tests for constrained functions often use constant
arguments. If constant evaluation is enhanced, such tests will not check
code generation of the tested functions. To avoid it, the tests are
modified to use loaded value instead of constants. Now only the tests
for rounding functions are changed.
---
.../X86/vector-constrained-fp-intrinsics.ll | 733 ++++++++++--------
1 file changed, 424 insertions(+), 309 deletions(-)
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index 21dfdc3c2abe49..579b15bbbb0ff8 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -3011,12 +3011,12 @@ entry:
ret <4 x double> %log2
}
-define <1 x float> @constrained_vector_rint_v1f32() #0 {
+define <1 x float> @constrained_vector_rint_v1f32(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_rint_v1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq rintf at PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -3024,98 +3024,111 @@ define <1 x float> @constrained_vector_rint_v1f32() #0 {
;
; AVX-LABEL: constrained_vector_rint_v1f32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <1 x float>, ptr %a
%rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32(
- <1 x float> <float 42.0>,
+ <1 x float> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <1 x float> %rint
}
-define <2 x double> @constrained_vector_rint_v2f64() #0 {
+define <2 x double> @constrained_vector_rint_v2f64(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_rint_v2f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
-; CHECK-NEXT: callq rint at PLT
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movaps (%rdi), %xmm0
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
; CHECK-NEXT: callq rint at PLT
-; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq rint at PLT
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_rint_v2f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vroundpd $4, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX-NEXT: vroundpd $4, (%rdi), %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <2 x double>, ptr %a
%rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
- <2 x double> <double 42.1, double 42.0>,
+ <2 x double> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <2 x double> %rint
}
-define <3 x float> @constrained_vector_rint_v3f32() #0 {
+define <3 x float> @constrained_vector_rint_v3f32(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_rint_v3f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $40, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq rintf at PLT
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq rintf at PLT
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq rintf at PLT
-; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_rint_v3f32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX-NEXT: retq
entry:
+ %b = load <3 x float>, ptr %a
%rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
- <3 x float> <float 42.0, float 43.0, float 44.0>,
+ <3 x float> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <3 x float> %rint
}
-define <3 x double> @constrained_vector_rint_v3f64() #0 {
+define <3 x double> @constrained_vector_rint_v3f64(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_rint_v3f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq rint at PLT
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq rint at PLT
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: callq rint at PLT
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
@@ -3124,70 +3137,76 @@ define <3 x double> @constrained_vector_rint_v3f64() #0 {
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_rint_v3f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vroundpd $4, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX-NEXT: vroundpd $4, (%rdi), %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX-NEXT: retq
entry:
+ %b = load <3 x double>, ptr %a
%rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64(
- <3 x double> <double 42.0, double 42.1, double 42.2>,
+ <3 x double> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <3 x double> %rint
}
-define <4 x double> @constrained_vector_rint_v4f64() #0 {
+define <4 x double> @constrained_vector_rint_v4f64(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_rint_v4f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $40, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: movaps (%rdi), %xmm1
+; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps 16(%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: callq rint at PLT
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq rint at PLT
-; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2399999999999999E+1,0.0E+0]
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq rint at PLT
-; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq rint at PLT
-; CHECK-NEXT: movaps %xmm0, %xmm1
-; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_rint_v4f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vroundpd $4, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
+; AVX-NEXT: vroundpd $4, (%rdi), %ymm0
; AVX-NEXT: retq
entry:
+ %b = load <4 x double>, ptr %a
%rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
- <4 x double> <double 42.1, double 42.2,
- double 42.3, double 42.4>,
+ <4 x double> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <4 x double> %rint
}
-define <1 x float> @constrained_vector_nearbyint_v1f32() #0 {
+define <1 x float> @constrained_vector_nearbyint_v1f32(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_nearbyint_v1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq nearbyintf at PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -3195,98 +3214,111 @@ define <1 x float> @constrained_vector_nearbyint_v1f32() #0 {
;
; AVX-LABEL: constrained_vector_nearbyint_v1f32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <1 x float>, ptr %a
%nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(
- <1 x float> <float 42.0>,
+ <1 x float> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <1 x float> %nearby
}
-define <2 x double> @constrained_vector_nearbyint_v2f64() #0 {
+define <2 x double> @constrained_vector_nearbyint_v2f64(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_nearbyint_v2f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
-; CHECK-NEXT: callq nearbyint at PLT
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movaps (%rdi), %xmm0
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
; CHECK-NEXT: callq nearbyint at PLT
-; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq nearbyint at PLT
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_nearbyint_v2f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vroundpd $12, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX-NEXT: vroundpd $12, (%rdi), %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <2 x double>, ptr %a
%nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
- <2 x double> <double 42.1, double 42.0>,
+ <2 x double> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <2 x double> %nearby
}
-define <3 x float> @constrained_vector_nearbyint_v3f32() #0 {
+define <3 x float> @constrained_vector_nearbyint_v3f32(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_nearbyint_v3f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $40, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq nearbyintf at PLT
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq nearbyintf at PLT
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq nearbyintf at PLT
-; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_nearbyint_v3f32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $12, %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: vroundss $12, %xmm2, %xmm2, %xmm2
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX-NEXT: retq
entry:
+ %b = load <3 x float>, ptr %a
%nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(
- <3 x float> <float 42.0, float 43.0, float 44.0>,
+ <3 x float> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <3 x float> %nearby
}
-define <3 x double> @constrained_vector_nearby_v3f64() #0 {
+define <3 x double> @constrained_vector_nearby_v3f64(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_nearby_v3f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq nearbyint at PLT
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq nearbyint at PLT
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: callq nearbyint at PLT
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
@@ -3295,59 +3327,65 @@ define <3 x double> @constrained_vector_nearby_v3f64() #0 {
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_nearby_v3f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vroundpd $12, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX-NEXT: vroundpd $12, (%rdi), %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX-NEXT: retq
entry:
+ %b = load <3 x double>, ptr %a
%nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(
- <3 x double> <double 42.0, double 42.1, double 42.2>,
+ <3 x double> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <3 x double> %nearby
}
-define <4 x double> @constrained_vector_nearbyint_v4f64() #0 {
+define <4 x double> @constrained_vector_nearbyint_v4f64(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_nearbyint_v4f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $40, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: movaps (%rdi), %xmm1
+; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps 16(%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: callq nearbyint at PLT
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq nearbyint at PLT
-; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2399999999999999E+1,0.0E+0]
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq nearbyint at PLT
-; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq nearbyint at PLT
-; CHECK-NEXT: movaps %xmm0, %xmm1
-; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_nearbyint_v4f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vroundpd $12, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
+; AVX-NEXT: vroundpd $12, (%rdi), %ymm0
; AVX-NEXT: retq
entry:
+ %b = load <4 x double>, ptr %a
%nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
- <4 x double> <double 42.1, double 42.2,
- double 42.3, double 42.4>,
+ <4 x double> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <4 x double> %nearby
@@ -5620,12 +5658,12 @@ entry:
ret <4 x double> %result
}
-define <1 x float> @constrained_vector_ceil_v1f32() #0 {
+define <1 x float> @constrained_vector_ceil_v1f32(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_ceil_v1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq ceilf at PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -5633,95 +5671,108 @@ define <1 x float> @constrained_vector_ceil_v1f32() #0 {
;
; AVX-LABEL: constrained_vector_ceil_v1f32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <1 x float>, ptr %a
%ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(
- <1 x float> <float 1.5>,
+ <1 x float> %b,
metadata !"fpexcept.strict") #0
ret <1 x float> %ceil
}
-define <2 x double> @constrained_vector_ceil_v2f64() #0 {
+define <2 x double> @constrained_vector_ceil_v2f64(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_ceil_v2f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
-; CHECK-NEXT: callq ceil at PLT
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movaps (%rdi), %xmm0
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
; CHECK-NEXT: callq ceil at PLT
-; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq ceil at PLT
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_ceil_v2f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vroundpd $10, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX-NEXT: vroundpd $10, (%rdi), %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <2 x double>, ptr %a
%ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
- <2 x double> <double 1.1, double 1.9>,
+ <2 x double> %b,
metadata !"fpexcept.strict") #0
ret <2 x double> %ceil
}
-define <3 x float> @constrained_vector_ceil_v3f32() #0 {
+define <3 x float> @constrained_vector_ceil_v3f32(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_ceil_v3f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $40, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq ceilf at PLT
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq ceilf at PLT
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq ceilf at PLT
-; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_ceil_v3f32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $10, %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vmovss {{.*#+}} xmm2 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: vroundss $10, %xmm2, %xmm2, %xmm2
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX-NEXT: retq
entry:
+ %b = load <3 x float>, ptr %a
%ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
- <3 x float> <float 1.5, float 2.5, float 3.5>,
+ <3 x float> %b,
metadata !"fpexcept.strict") #0
ret <3 x float> %ceil
}
-define <3 x double> @constrained_vector_ceil_v3f64() #0 {
+define <3 x double> @constrained_vector_ceil_v3f64(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_ceil_v3f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq ceil at PLT
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq ceil at PLT
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: callq ceil at PLT
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
@@ -5730,30 +5781,31 @@ define <3 x double> @constrained_vector_ceil_v3f64() #0 {
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_ceil_v3f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vroundpd $10, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX-NEXT: vroundpd $10, (%rdi), %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX-NEXT: retq
entry:
+ %b = load <3 x double>, ptr %a
%ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64(
- <3 x double> <double 1.1, double 1.9, double 1.5>,
+ <3 x double> %b,
metadata !"fpexcept.strict") #0
ret <3 x double> %ceil
}
-define <1 x float> @constrained_vector_floor_v1f32() #0 {
+define <1 x float> @constrained_vector_floor_v1f32(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_floor_v1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq floorf at PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -5761,96 +5813,109 @@ define <1 x float> @constrained_vector_floor_v1f32() #0 {
;
; AVX-LABEL: constrained_vector_floor_v1f32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <1 x float>, ptr %a
%floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32(
- <1 x float> <float 1.5>,
+ <1 x float> %b,
metadata !"fpexcept.strict") #0
ret <1 x float> %floor
}
-define <2 x double> @constrained_vector_floor_v2f64() #0 {
+define <2 x double> @constrained_vector_floor_v2f64(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_floor_v2f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
-; CHECK-NEXT: callq floor at PLT
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movaps (%rdi), %xmm0
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
; CHECK-NEXT: callq floor at PLT
-; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq floor at PLT
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_floor_v2f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vroundpd $9, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX-NEXT: vroundpd $9, (%rdi), %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <2 x double>, ptr %a
%floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
- <2 x double> <double 1.1, double 1.9>,
+ <2 x double> %b,
metadata !"fpexcept.strict") #0
ret <2 x double> %floor
}
-define <3 x float> @constrained_vector_floor_v3f32() #0 {
+define <3 x float> @constrained_vector_floor_v3f32(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_floor_v3f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $40, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq floorf at PLT
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq floorf at PLT
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq floorf at PLT
-; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_floor_v3f32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $9, %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vmovss {{.*#+}} xmm2 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: vroundss $9, %xmm2, %xmm2, %xmm2
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX-NEXT: retq
entry:
+ %b = load <3 x float>, ptr %a
%floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
- <3 x float> <float 1.5, float 2.5, float 3.5>,
+ <3 x float> %b,
metadata !"fpexcept.strict") #0
ret <3 x float> %floor
}
-define <3 x double> @constrained_vector_floor_v3f64() #0 {
+define <3 x double> @constrained_vector_floor_v3f64(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_floor_v3f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq floor at PLT
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq floor at PLT
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: callq floor at PLT
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
@@ -5859,30 +5924,31 @@ define <3 x double> @constrained_vector_floor_v3f64() #0 {
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_floor_v3f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vroundpd $9, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX-NEXT: vroundpd $9, (%rdi), %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX-NEXT: retq
entry:
+ %b = load <3 x double>, ptr %a
%floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64(
- <3 x double> <double 1.1, double 1.9, double 1.5>,
+ <3 x double> %b,
metadata !"fpexcept.strict") #0
ret <3 x double> %floor
}
-define <1 x float> @constrained_vector_round_v1f32() #0 {
+define <1 x float> @constrained_vector_round_v1f32(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_round_v1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq roundf at PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -5892,116 +5958,141 @@ define <1 x float> @constrained_vector_round_v1f32() #0 {
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: callq roundf at PLT
; AVX-NEXT: popq %rax
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
+ %b = load <1 x float>, ptr %a
%round = call <1 x float> @llvm.experimental.constrained.round.v1f32(
- <1 x float> <float 1.5>,
+ <1 x float> %b,
metadata !"fpexcept.strict") #0
ret <1 x float> %round
}
-define <2 x double> @constrained_vector_round_v2f64() #0 {
+define <2 x double> @constrained_vector_round_v2f64(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_round_v2f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
-; CHECK-NEXT: callq round at PLT
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movaps (%rdi), %xmm0
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
; CHECK-NEXT: callq round at PLT
-; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq round at PLT
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_round_v2f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: subq $24, %rsp
-; AVX-NEXT: .cfi_def_cfa_offset 32
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
+; AVX-NEXT: subq $40, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 48
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: callq round at PLT
-; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
+; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; AVX-NEXT: # xmm0 = mem[0],zero
; AVX-NEXT: callq round at PLT
-; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
-; AVX-NEXT: addq $24, %rsp
+; AVX-NEXT: addq $40, %rsp
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
+ %b = load <2 x double>, ptr %a
%round = call <2 x double> @llvm.experimental.constrained.round.v2f64(
- <2 x double> <double 1.1, double 1.9>,
+ <2 x double> %b,
metadata !"fpexcept.strict") #0
ret <2 x double> %round
}
-define <3 x float> @constrained_vector_round_v3f32() #0 {
+define <3 x float> @constrained_vector_round_v3f32(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_round_v3f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $40, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq roundf at PLT
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq roundf at PLT
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq roundf at PLT
-; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_round_v3f32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: subq $40, %rsp
-; AVX-NEXT: .cfi_def_cfa_offset 48
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: pushq %rbx
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: subq $48, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 64
+; AVX-NEXT: .cfi_offset %rbx, -16
+; AVX-NEXT: movq %rdi, %rbx
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: callq roundf at PLT
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; AVX-NEXT: callq roundf at PLT
-; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: callq roundf at PLT
-; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
-; AVX-NEXT: addq $40, %rsp
+; AVX-NEXT: addq $48, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: popq %rbx
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
+ %b = load <3 x float>, ptr %a
%round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
- <3 x float> <float 1.5, float 2.5, float 3.5>,
+ <3 x float> %b,
metadata !"fpexcept.strict") #0
ret <3 x float> %round
}
-define <3 x double> @constrained_vector_round_v3f64() #0 {
+define <3 x double> @constrained_vector_round_v3f64(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_round_v3f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq round at PLT
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq round at PLT
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: callq round at PLT
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
@@ -6010,43 +6101,53 @@ define <3 x double> @constrained_vector_round_v3f64() #0 {
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_round_v3f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: subq $40, %rsp
-; AVX-NEXT: .cfi_def_cfa_offset 48
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
+; AVX-NEXT: pushq %rbx
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: subq $48, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 64
+; AVX-NEXT: .cfi_offset %rbx, -16
+; AVX-NEXT: movq %rdi, %rbx
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: callq round at PLT
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
+; AVX-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; AVX-NEXT: # xmm0 = mem[0],zero
; AVX-NEXT: callq round at PLT
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
-; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; AVX-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vzeroupper
; AVX-NEXT: callq round at PLT
-; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
+; AVX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX-NEXT: addq $40, %rsp
+; AVX-NEXT: addq $48, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: popq %rbx
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
+ %b = load <3 x double>, ptr %a
%round = call <3 x double> @llvm.experimental.constrained.round.v3f64(
- <3 x double> <double 1.1, double 1.9, double 1.5>,
+ <3 x double> %b,
metadata !"fpexcept.strict") #0
ret <3 x double> %round
}
-define <1 x float> @constrained_vector_trunc_v1f32() #0 {
+define <1 x float> @constrained_vector_trunc_v1f32(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_trunc_v1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq truncf at PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -6054,95 +6155,108 @@ define <1 x float> @constrained_vector_trunc_v1f32() #0 {
;
; AVX-LABEL: constrained_vector_trunc_v1f32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <1 x float>, ptr %a
%trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(
- <1 x float> <float 1.5>,
+ <1 x float> %b,
metadata !"fpexcept.strict") #0
ret <1 x float> %trunc
}
-define <2 x double> @constrained_vector_trunc_v2f64() #0 {
+define <2 x double> @constrained_vector_trunc_v2f64(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_trunc_v2f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
-; CHECK-NEXT: callq trunc at PLT
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movaps (%rdi), %xmm0
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
; CHECK-NEXT: callq trunc at PLT
-; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq trunc at PLT
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_trunc_v2f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vroundpd $11, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX-NEXT: vroundpd $11, (%rdi), %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <2 x double>, ptr %a
%trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
- <2 x double> <double 1.1, double 1.9>,
+ <2 x double> %b,
metadata !"fpexcept.strict") #0
ret <2 x double> %trunc
}
-define <3 x float> @constrained_vector_trunc_v3f32() #0 {
+define <3 x float> @constrained_vector_trunc_v3f32(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_trunc_v3f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $40, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq truncf at PLT
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq truncf at PLT
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq truncf at PLT
-; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_trunc_v3f32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $11, %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vmovss {{.*#+}} xmm2 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: vroundss $11, %xmm2, %xmm2, %xmm2
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX-NEXT: retq
entry:
+ %b = load <3 x float>, ptr %a
%trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
- <3 x float> <float 1.5, float 2.5, float 3.5>,
+ <3 x float> %b,
metadata !"fpexcept.strict") #0
ret <3 x float> %trunc
}
-define <3 x double> @constrained_vector_trunc_v3f64() #0 {
+define <3 x double> @constrained_vector_trunc_v3f64(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_trunc_v3f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq trunc at PLT
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq trunc at PLT
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: callq trunc at PLT
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
@@ -6151,20 +6265,21 @@ define <3 x double> @constrained_vector_trunc_v3f64() #0 {
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_trunc_v3f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vroundpd $11, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX-NEXT: vroundpd $11, (%rdi), %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX-NEXT: retq
entry:
+ %b = load <3 x double>, ptr %a
%trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64(
- <3 x double> <double 1.1, double 1.9, double 1.5>,
+ <3 x double> %b,
metadata !"fpexcept.strict") #0
ret <3 x double> %trunc
}
More information about the llvm-commits
mailing list