[llvm] 809c5ac - [X86] Modify tests for constrained rounding functions (#116951)

via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 25 06:39:38 PST 2024


Author: Serge Pavlov
Date: 2024-11-25T21:39:35+07:00
New Revision: 809c5ac3b0d78f504d93717ac4c0a02816cf47bb

URL: https://github.com/llvm/llvm-project/commit/809c5ac3b0d78f504d93717ac4c0a02816cf47bb
DIFF: https://github.com/llvm/llvm-project/commit/809c5ac3b0d78f504d93717ac4c0a02816cf47bb.diff

LOG: [X86] Modify tests for constrained rounding functions (#116951)

The existing tests for constrained functions often use constant
arguments. If constant evaluation is enhanced, such tests will not check
code generation of the tested functions. To avoid it, the tests are
modified to use loaded value instead of constants. Now only the tests
for rounding functions are changed.

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index 21dfdc3c2abe49..49062eaef31887 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -3011,25 +3011,26 @@ entry:
   ret <4 x double> %log2
 }
 
-define <1 x float> @constrained_vector_rint_v1f32() #0 {
-; CHECK-LABEL: constrained_vector_rint_v1f32:
+define <1 x float> @constrained_vector_rint_v1f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_rint_v1f32_var:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    callq rintf at PLT
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_rint_v1f32:
+; AVX-LABEL: constrained_vector_rint_v1f32_var:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; AVX-NEXT:    vroundss $4, %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 entry:
+  %b = load <1 x float>, ptr %a
   %rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32(
-                             <1 x float> <float 42.0>,
+                             <1 x float> %b,
                              metadata !"round.dynamic",
                              metadata !"fpexcept.strict") #0
   ret <1 x float> %rint
@@ -3063,42 +3064,77 @@ entry:
   ret <2 x double> %rint
 }
 
-define <3 x float> @constrained_vector_rint_v3f32() #0 {
-; CHECK-LABEL: constrained_vector_rint_v3f32:
+define <2 x double> @constrained_vector_rint_v2f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_rint_v2f64_var:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    subq $40, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movaps (%rdi), %xmm0
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    callq rint at PLT
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    callq rint at PLT
+; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_rint_v2f64_var:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vroundpd $4, (%rdi), %xmm0
+; AVX-NEXT:    retq
+entry:
+  %b = load <2 x double>, ptr %a
+  %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
+                        <2 x double> %b,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict") #0
+  ret <2 x double> %rint
+}
+
+define <3 x float> @constrained_vector_rint_v3f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_rint_v3f32_var:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subq $56, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    movaps (%rdi), %xmm0
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
 ; CHECK-NEXT:    callq rintf at PLT
 ; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
 ; CHECK-NEXT:    callq rintf at PLT
-; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
 ; CHECK-NEXT:    callq rintf at PLT
-; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT:    movaps %xmm1, %xmm0
-; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT:    addq $56, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_rint_v3f32:
+; AVX-LABEL: constrained_vector_rint_v3f32_var:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; AVX-NEXT:    vroundss $4, %xmm0, %xmm0, %xmm0
-; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; AVX-NEXT:    vroundss $4, %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
 ; AVX-NEXT:    vroundss $4, %xmm2, %xmm2, %xmm2
 ; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
 ; AVX-NEXT:    retq
  entry:
+  %b = load <3 x float>, ptr %a
   %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              <3 x float> %b,
                               metadata !"round.dynamic",
                               metadata !"fpexcept.strict") #0
   ret <3 x float> %rint
@@ -3143,6 +3179,51 @@ entry:
   ret <3 x double> %rint
 }
 
+define <3 x double> @constrained_vector_rint_v3f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_rint_v3f64_var:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movaps (%rdi), %xmm0
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    callq rint at PLT
+; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    callq rint at PLT
+; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
+; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    # xmm0 = mem[0],zero
+; CHECK-NEXT:    callq rint at PLT
+; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    wait
+; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    # xmm0 = mem[0],zero
+; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
+; CHECK-NEXT:    # xmm1 = mem[0],zero
+; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_rint_v3f64_var:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT:    vroundsd $4, %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vroundpd $4, (%rdi), %xmm1
+; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX-NEXT:    retq
+entry:
+  %b = load <3 x double>, ptr %a
+  %rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict") #0
+  ret <3 x double> %rint
+}
+
 define <4 x double> @constrained_vector_rint_v4f64() #0 {
 ; CHECK-LABEL: constrained_vector_rint_v4f64:
 ; CHECK:       # %bb.0: # %entry
@@ -3182,25 +3263,70 @@ entry:
   ret <4 x double> %rint
 }
 
-define <1 x float> @constrained_vector_nearbyint_v1f32() #0 {
-; CHECK-LABEL: constrained_vector_nearbyint_v1f32:
+define <4 x double> @constrained_vector_rint_v4f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_rint_v4f64_var:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subq $56, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    movaps (%rdi), %xmm1
+; CHECK-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movaps 16(%rdi), %xmm0
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    callq rint at PLT
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    callq rint at PLT
+; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    callq rint at PLT
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    callq rint at PLT
+; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    addq $56, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_rint_v4f64_var:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vroundpd $4, (%rdi), %ymm0
+; AVX-NEXT:    retq
+entry:
+  %b = load <4 x double>, ptr %a
+  %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
+                        <4 x double> %b,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict") #0
+  ret <4 x double> %rint
+}
+
+define <1 x float> @constrained_vector_nearbyint_v1f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_nearbyint_v1f32_var:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    callq nearbyintf at PLT
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_nearbyint_v1f32:
+; AVX-LABEL: constrained_vector_nearbyint_v1f32_var:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; AVX-NEXT:    vroundss $12, %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 entry:
+  %b = load <1 x float>, ptr %a
   %nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(
-                               <1 x float> <float 42.0>,
+                               <1 x float> %b,
                                metadata !"round.dynamic",
                                metadata !"fpexcept.strict") #0
   ret <1 x float> %nearby
@@ -3234,42 +3360,77 @@ entry:
   ret <2 x double> %nearby
 }
 
-define <3 x float> @constrained_vector_nearbyint_v3f32() #0 {
-; CHECK-LABEL: constrained_vector_nearbyint_v3f32:
+define <2 x double> @constrained_vector_nearbyint_v2f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_nearbyint_v2f64_var:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    subq $40, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movaps (%rdi), %xmm0
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    callq nearbyint at PLT
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    callq nearbyint at PLT
+; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_nearbyint_v2f64_var:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vroundpd $12, (%rdi), %xmm0
+; AVX-NEXT:    retq
+entry:
+  %b = load <2 x double>, ptr %a
+  %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
+                                <2 x double> %b,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict") #0
+  ret <2 x double> %nearby
+}
+
+define <3 x float> @constrained_vector_nearbyint_v3f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_nearbyint_v3f32_var:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subq $56, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    movaps (%rdi), %xmm0
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
 ; CHECK-NEXT:    callq nearbyintf at PLT
 ; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
 ; CHECK-NEXT:    callq nearbyintf at PLT
-; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
 ; CHECK-NEXT:    callq nearbyintf at PLT
-; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT:    movaps %xmm1, %xmm0
-; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT:    addq $56, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_nearbyint_v3f32:
+; AVX-LABEL: constrained_vector_nearbyint_v3f32_var:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; AVX-NEXT:    vroundss $12, %xmm0, %xmm0, %xmm0
-; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; AVX-NEXT:    vroundss $12, %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
 ; AVX-NEXT:    vroundss $12, %xmm2, %xmm2, %xmm2
 ; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
 ; AVX-NEXT:    retq
 entry:
+  %b = load <3 x float>, ptr %a
   %nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              <3 x float> %b,
                               metadata !"round.dynamic",
                               metadata !"fpexcept.strict") #0
   ret <3 x float> %nearby
@@ -3314,6 +3475,51 @@ entry:
   ret <3 x double> %nearby
 }
 
+define <3 x double> @constrained_vector_nearbyint_v3f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_nearbyint_v3f64_var:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movaps (%rdi), %xmm0
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    callq nearbyint at PLT
+; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    callq nearbyint at PLT
+; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
+; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    # xmm0 = mem[0],zero
+; CHECK-NEXT:    callq nearbyint at PLT
+; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    wait
+; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    # xmm0 = mem[0],zero
+; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
+; CHECK-NEXT:    # xmm1 = mem[0],zero
+; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_nearbyint_v3f64_var:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT:    vroundsd $12, %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vroundpd $12, (%rdi), %xmm1
+; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX-NEXT:    retq
+entry:
+  %b = load <3 x double>, ptr %a
+  %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict") #0
+  ret <3 x double> %nearby
+}
+
 define <4 x double> @constrained_vector_nearbyint_v4f64() #0 {
 ; CHECK-LABEL: constrained_vector_nearbyint_v4f64:
 ; CHECK:       # %bb.0: # %entry
@@ -3353,6 +3559,50 @@ entry:
   ret <4 x double> %nearby
 }
 
+define <4 x double> @constrained_vector_nearbyint_v4f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_nearbyint_v4f64_var:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subq $56, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    movaps (%rdi), %xmm1
+; CHECK-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movaps 16(%rdi), %xmm0
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    callq nearbyint at PLT
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    callq nearbyint at PLT
+; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    callq nearbyint at PLT
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    callq nearbyint at PLT
+; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    addq $56, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_nearbyint_v4f64_var:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vroundpd $12, (%rdi), %ymm0
+; AVX-NEXT:    retq
+entry:
+  %b = load <4 x double>, ptr %a
+  %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
+                                <4 x double> %b,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict") #0
+  ret <4 x double> %nearby
+}
+
 define <1 x float> @constrained_vector_maxnum_v1f32() #0 {
 ; CHECK-LABEL: constrained_vector_maxnum_v1f32:
 ; CHECK:       # %bb.0: # %entry
@@ -4482,10 +4732,10 @@ define <1 x i64> @constrained_vector_fptoui_v1i64_v1f32() #0 {
 ; CHECK-NEXT:    movss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-NEXT:    comiss %xmm0, %xmm2
 ; CHECK-NEXT:    xorps %xmm1, %xmm1
-; CHECK-NEXT:    ja .LBB115_2
+; CHECK-NEXT:    ja .LBB121_2
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    movaps %xmm2, %xmm1
-; CHECK-NEXT:  .LBB115_2: # %entry
+; CHECK-NEXT:  .LBB121_2: # %entry
 ; CHECK-NEXT:    subss %xmm1, %xmm0
 ; CHECK-NEXT:    cvttss2si %xmm0, %rcx
 ; CHECK-NEXT:    setbe %al
@@ -4500,10 +4750,10 @@ define <1 x i64> @constrained_vector_fptoui_v1i64_v1f32() #0 {
 ; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
 ; AVX1-NEXT:    vcomiss %xmm0, %xmm1
 ; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    ja .LBB115_2
+; AVX1-NEXT:    ja .LBB121_2
 ; AVX1-NEXT:  # %bb.1: # %entry
 ; AVX1-NEXT:    vmovaps %xmm1, %xmm2
-; AVX1-NEXT:  .LBB115_2: # %entry
+; AVX1-NEXT:  .LBB121_2: # %entry
 ; AVX1-NEXT:    vsubss %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vcvttss2si %xmm0, %rcx
 ; AVX1-NEXT:    setbe %al
@@ -4531,10 +4781,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 {
 ; CHECK-NEXT:    comiss %xmm2, %xmm1
 ; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:    xorps %xmm3, %xmm3
-; CHECK-NEXT:    ja .LBB116_2
+; CHECK-NEXT:    ja .LBB122_2
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    movaps %xmm1, %xmm3
-; CHECK-NEXT:  .LBB116_2: # %entry
+; CHECK-NEXT:  .LBB122_2: # %entry
 ; CHECK-NEXT:    subss %xmm3, %xmm2
 ; CHECK-NEXT:    cvttss2si %xmm2, %rax
 ; CHECK-NEXT:    setbe %cl
@@ -4544,10 +4794,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 {
 ; CHECK-NEXT:    movq %rcx, %xmm2
 ; CHECK-NEXT:    movss {{.*#+}} xmm3 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-NEXT:    comiss %xmm3, %xmm1
-; CHECK-NEXT:    ja .LBB116_4
+; CHECK-NEXT:    ja .LBB122_4
 ; CHECK-NEXT:  # %bb.3: # %entry
 ; CHECK-NEXT:    movaps %xmm1, %xmm0
-; CHECK-NEXT:  .LBB116_4: # %entry
+; CHECK-NEXT:  .LBB122_4: # %entry
 ; CHECK-NEXT:    subss %xmm0, %xmm3
 ; CHECK-NEXT:    cvttss2si %xmm3, %rax
 ; CHECK-NEXT:    setbe %cl
@@ -4565,10 +4815,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 {
 ; AVX1-NEXT:    vcomiss %xmm2, %xmm0
 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vxorps %xmm3, %xmm3, %xmm3
-; AVX1-NEXT:    ja .LBB116_2
+; AVX1-NEXT:    ja .LBB122_2
 ; AVX1-NEXT:  # %bb.1: # %entry
 ; AVX1-NEXT:    vmovaps %xmm0, %xmm3
-; AVX1-NEXT:  .LBB116_2: # %entry
+; AVX1-NEXT:  .LBB122_2: # %entry
 ; AVX1-NEXT:    vsubss %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vcvttss2si %xmm2, %rax
 ; AVX1-NEXT:    setbe %cl
@@ -4578,10 +4828,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 {
 ; AVX1-NEXT:    vmovq %rcx, %xmm2
 ; AVX1-NEXT:    vmovss {{.*#+}} xmm3 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
 ; AVX1-NEXT:    vcomiss %xmm3, %xmm0
-; AVX1-NEXT:    ja .LBB116_4
+; AVX1-NEXT:    ja .LBB122_4
 ; AVX1-NEXT:  # %bb.3: # %entry
 ; AVX1-NEXT:    vmovaps %xmm0, %xmm1
-; AVX1-NEXT:  .LBB116_4: # %entry
+; AVX1-NEXT:  .LBB122_4: # %entry
 ; AVX1-NEXT:    vsubss %xmm1, %xmm3, %xmm0
 ; AVX1-NEXT:    vcvttss2si %xmm0, %rax
 ; AVX1-NEXT:    setbe %cl
@@ -4622,10 +4872,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 {
 ; CHECK-NEXT:    comiss %xmm2, %xmm1
 ; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:    xorps %xmm3, %xmm3
-; CHECK-NEXT:    ja .LBB117_2
+; CHECK-NEXT:    ja .LBB123_2
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    movaps %xmm1, %xmm3
-; CHECK-NEXT:  .LBB117_2: # %entry
+; CHECK-NEXT:  .LBB123_2: # %entry
 ; CHECK-NEXT:    subss %xmm3, %xmm2
 ; CHECK-NEXT:    cvttss2si %xmm2, %rcx
 ; CHECK-NEXT:    setbe %al
@@ -4635,10 +4885,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 {
 ; CHECK-NEXT:    movss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-NEXT:    comiss %xmm2, %xmm1
 ; CHECK-NEXT:    xorps %xmm3, %xmm3
-; CHECK-NEXT:    ja .LBB117_4
+; CHECK-NEXT:    ja .LBB123_4
 ; CHECK-NEXT:  # %bb.3: # %entry
 ; CHECK-NEXT:    movaps %xmm1, %xmm3
-; CHECK-NEXT:  .LBB117_4: # %entry
+; CHECK-NEXT:  .LBB123_4: # %entry
 ; CHECK-NEXT:    subss %xmm3, %xmm2
 ; CHECK-NEXT:    cvttss2si %xmm2, %rcx
 ; CHECK-NEXT:    setbe %dl
@@ -4647,10 +4897,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 {
 ; CHECK-NEXT:    xorq %rcx, %rdx
 ; CHECK-NEXT:    movss {{.*#+}} xmm2 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-NEXT:    comiss %xmm2, %xmm1
-; CHECK-NEXT:    ja .LBB117_6
+; CHECK-NEXT:    ja .LBB123_6
 ; CHECK-NEXT:  # %bb.5: # %entry
 ; CHECK-NEXT:    movaps %xmm1, %xmm0
-; CHECK-NEXT:  .LBB117_6: # %entry
+; CHECK-NEXT:  .LBB123_6: # %entry
 ; CHECK-NEXT:    subss %xmm0, %xmm2
 ; CHECK-NEXT:    cvttss2si %xmm2, %rsi
 ; CHECK-NEXT:    setbe %cl
@@ -4666,10 +4916,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 {
 ; AVX1-NEXT:    vcomiss %xmm2, %xmm0
 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vxorps %xmm3, %xmm3, %xmm3
-; AVX1-NEXT:    ja .LBB117_2
+; AVX1-NEXT:    ja .LBB123_2
 ; AVX1-NEXT:  # %bb.1: # %entry
 ; AVX1-NEXT:    vmovaps %xmm0, %xmm3
-; AVX1-NEXT:  .LBB117_2: # %entry
+; AVX1-NEXT:  .LBB123_2: # %entry
 ; AVX1-NEXT:    vsubss %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vcvttss2si %xmm2, %rax
 ; AVX1-NEXT:    setbe %cl
@@ -4680,10 +4930,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 {
 ; AVX1-NEXT:    vmovss {{.*#+}} xmm3 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
 ; AVX1-NEXT:    vcomiss %xmm3, %xmm0
 ; AVX1-NEXT:    vxorps %xmm4, %xmm4, %xmm4
-; AVX1-NEXT:    ja .LBB117_4
+; AVX1-NEXT:    ja .LBB123_4
 ; AVX1-NEXT:  # %bb.3: # %entry
 ; AVX1-NEXT:    vmovaps %xmm0, %xmm4
-; AVX1-NEXT:  .LBB117_4: # %entry
+; AVX1-NEXT:  .LBB123_4: # %entry
 ; AVX1-NEXT:    vsubss %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vcvttss2si %xmm3, %rax
 ; AVX1-NEXT:    setbe %cl
@@ -4694,10 +4944,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 {
 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
 ; AVX1-NEXT:    vmovss {{.*#+}} xmm3 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
 ; AVX1-NEXT:    vcomiss %xmm3, %xmm0
-; AVX1-NEXT:    ja .LBB117_6
+; AVX1-NEXT:    ja .LBB123_6
 ; AVX1-NEXT:  # %bb.5: # %entry
 ; AVX1-NEXT:    vmovaps %xmm0, %xmm1
-; AVX1-NEXT:  .LBB117_6: # %entry
+; AVX1-NEXT:  .LBB123_6: # %entry
 ; AVX1-NEXT:    vsubss %xmm1, %xmm3, %xmm0
 ; AVX1-NEXT:    vcvttss2si %xmm0, %rax
 ; AVX1-NEXT:    setbe %cl
@@ -4735,10 +4985,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
 ; CHECK-NEXT:    comiss %xmm0, %xmm2
 ; CHECK-NEXT:    xorps %xmm1, %xmm1
 ; CHECK-NEXT:    xorps %xmm3, %xmm3
-; CHECK-NEXT:    ja .LBB118_2
+; CHECK-NEXT:    ja .LBB124_2
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    movaps %xmm2, %xmm3
-; CHECK-NEXT:  .LBB118_2: # %entry
+; CHECK-NEXT:  .LBB124_2: # %entry
 ; CHECK-NEXT:    subss %xmm3, %xmm0
 ; CHECK-NEXT:    cvttss2si %xmm0, %rcx
 ; CHECK-NEXT:    setbe %al
@@ -4748,10 +4998,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
 ; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-NEXT:    comiss %xmm0, %xmm2
 ; CHECK-NEXT:    xorps %xmm4, %xmm4
-; CHECK-NEXT:    ja .LBB118_4
+; CHECK-NEXT:    ja .LBB124_4
 ; CHECK-NEXT:  # %bb.3: # %entry
 ; CHECK-NEXT:    movaps %xmm2, %xmm4
-; CHECK-NEXT:  .LBB118_4: # %entry
+; CHECK-NEXT:  .LBB124_4: # %entry
 ; CHECK-NEXT:    movq %rax, %xmm3
 ; CHECK-NEXT:    subss %xmm4, %xmm0
 ; CHECK-NEXT:    cvttss2si %xmm0, %rax
@@ -4763,10 +5013,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
 ; CHECK-NEXT:    movss {{.*#+}} xmm4 = [4.5E+1,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-NEXT:    comiss %xmm4, %xmm2
 ; CHECK-NEXT:    xorps %xmm5, %xmm5
-; CHECK-NEXT:    ja .LBB118_6
+; CHECK-NEXT:    ja .LBB124_6
 ; CHECK-NEXT:  # %bb.5: # %entry
 ; CHECK-NEXT:    movaps %xmm2, %xmm5
-; CHECK-NEXT:  .LBB118_6: # %entry
+; CHECK-NEXT:  .LBB124_6: # %entry
 ; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
 ; CHECK-NEXT:    subss %xmm5, %xmm4
 ; CHECK-NEXT:    cvttss2si %xmm4, %rax
@@ -4777,10 +5027,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
 ; CHECK-NEXT:    movq %rcx, %xmm3
 ; CHECK-NEXT:    movss {{.*#+}} xmm4 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-NEXT:    comiss %xmm4, %xmm2
-; CHECK-NEXT:    ja .LBB118_8
+; CHECK-NEXT:    ja .LBB124_8
 ; CHECK-NEXT:  # %bb.7: # %entry
 ; CHECK-NEXT:    movaps %xmm2, %xmm1
-; CHECK-NEXT:  .LBB118_8: # %entry
+; CHECK-NEXT:  .LBB124_8: # %entry
 ; CHECK-NEXT:    subss %xmm1, %xmm4
 ; CHECK-NEXT:    cvttss2si %xmm4, %rax
 ; CHECK-NEXT:    setbe %cl
@@ -4798,10 +5048,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
 ; AVX1-NEXT:    vcomiss %xmm2, %xmm0
 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vxorps %xmm3, %xmm3, %xmm3
-; AVX1-NEXT:    ja .LBB118_2
+; AVX1-NEXT:    ja .LBB124_2
 ; AVX1-NEXT:  # %bb.1: # %entry
 ; AVX1-NEXT:    vmovaps %xmm0, %xmm3
-; AVX1-NEXT:  .LBB118_2: # %entry
+; AVX1-NEXT:  .LBB124_2: # %entry
 ; AVX1-NEXT:    vsubss %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vcvttss2si %xmm2, %rcx
 ; AVX1-NEXT:    setbe %al
@@ -4811,10 +5061,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
 ; AVX1-NEXT:    vmovss {{.*#+}} xmm3 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
 ; AVX1-NEXT:    vcomiss %xmm3, %xmm0
 ; AVX1-NEXT:    vxorps %xmm4, %xmm4, %xmm4
-; AVX1-NEXT:    ja .LBB118_4
+; AVX1-NEXT:    ja .LBB124_4
 ; AVX1-NEXT:  # %bb.3: # %entry
 ; AVX1-NEXT:    vmovaps %xmm0, %xmm4
-; AVX1-NEXT:  .LBB118_4: # %entry
+; AVX1-NEXT:  .LBB124_4: # %entry
 ; AVX1-NEXT:    vmovq %rax, %xmm2
 ; AVX1-NEXT:    vsubss %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vcvttss2si %xmm3, %rax
@@ -4826,10 +5076,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
 ; AVX1-NEXT:    vmovss {{.*#+}} xmm4 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
 ; AVX1-NEXT:    vcomiss %xmm4, %xmm0
 ; AVX1-NEXT:    vxorps %xmm5, %xmm5, %xmm5
-; AVX1-NEXT:    ja .LBB118_6
+; AVX1-NEXT:    ja .LBB124_6
 ; AVX1-NEXT:  # %bb.5: # %entry
 ; AVX1-NEXT:    vmovaps %xmm0, %xmm5
-; AVX1-NEXT:  .LBB118_6: # %entry
+; AVX1-NEXT:  .LBB124_6: # %entry
 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
 ; AVX1-NEXT:    vsubss %xmm5, %xmm4, %xmm3
 ; AVX1-NEXT:    vcvttss2si %xmm3, %rax
@@ -4840,10 +5090,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
 ; AVX1-NEXT:    vmovq %rcx, %xmm3
 ; AVX1-NEXT:    vmovss {{.*#+}} xmm4 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
 ; AVX1-NEXT:    vcomiss %xmm4, %xmm0
-; AVX1-NEXT:    ja .LBB118_8
+; AVX1-NEXT:    ja .LBB124_8
 ; AVX1-NEXT:  # %bb.7: # %entry
 ; AVX1-NEXT:    vmovaps %xmm0, %xmm1
-; AVX1-NEXT:  .LBB118_8: # %entry
+; AVX1-NEXT:  .LBB124_8: # %entry
 ; AVX1-NEXT:    vsubss %xmm1, %xmm4, %xmm0
 ; AVX1-NEXT:    vcvttss2si %xmm0, %rax
 ; AVX1-NEXT:    setbe %cl
@@ -5036,10 +5286,10 @@ define <1 x i64> @constrained_vector_fptoui_v1i64_v1f64() #0 {
 ; CHECK-NEXT:    movsd {{.*#+}} xmm2 = [9.2233720368547758E+18,0.0E+0]
 ; CHECK-NEXT:    comisd %xmm0, %xmm2
 ; CHECK-NEXT:    xorpd %xmm1, %xmm1
-; CHECK-NEXT:    ja .LBB123_2
+; CHECK-NEXT:    ja .LBB129_2
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    movapd %xmm2, %xmm1
-; CHECK-NEXT:  .LBB123_2: # %entry
+; CHECK-NEXT:  .LBB129_2: # %entry
 ; CHECK-NEXT:    subsd %xmm1, %xmm0
 ; CHECK-NEXT:    cvttsd2si %xmm0, %rcx
 ; CHECK-NEXT:    setbe %al
@@ -5054,10 +5304,10 @@ define <1 x i64> @constrained_vector_fptoui_v1i64_v1f64() #0 {
 ; AVX1-NEXT:    vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
 ; AVX1-NEXT:    vcomisd %xmm0, %xmm1
 ; AVX1-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    ja .LBB123_2
+; AVX1-NEXT:    ja .LBB129_2
 ; AVX1-NEXT:  # %bb.1: # %entry
 ; AVX1-NEXT:    vmovapd %xmm1, %xmm2
-; AVX1-NEXT:  .LBB123_2: # %entry
+; AVX1-NEXT:  .LBB129_2: # %entry
 ; AVX1-NEXT:    vsubsd %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vcvttsd2si %xmm0, %rcx
 ; AVX1-NEXT:    setbe %al
@@ -5085,10 +5335,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 {
 ; CHECK-NEXT:    comisd %xmm2, %xmm1
 ; CHECK-NEXT:    xorpd %xmm0, %xmm0
 ; CHECK-NEXT:    xorpd %xmm3, %xmm3
-; CHECK-NEXT:    ja .LBB124_2
+; CHECK-NEXT:    ja .LBB130_2
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    movapd %xmm1, %xmm3
-; CHECK-NEXT:  .LBB124_2: # %entry
+; CHECK-NEXT:  .LBB130_2: # %entry
 ; CHECK-NEXT:    subsd %xmm3, %xmm2
 ; CHECK-NEXT:    cvttsd2si %xmm2, %rax
 ; CHECK-NEXT:    setbe %cl
@@ -5098,10 +5348,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 {
 ; CHECK-NEXT:    movq %rcx, %xmm2
 ; CHECK-NEXT:    movsd {{.*#+}} xmm3 = [4.2100000000000001E+1,0.0E+0]
 ; CHECK-NEXT:    comisd %xmm3, %xmm1
-; CHECK-NEXT:    ja .LBB124_4
+; CHECK-NEXT:    ja .LBB130_4
 ; CHECK-NEXT:  # %bb.3: # %entry
 ; CHECK-NEXT:    movapd %xmm1, %xmm0
-; CHECK-NEXT:  .LBB124_4: # %entry
+; CHECK-NEXT:  .LBB130_4: # %entry
 ; CHECK-NEXT:    subsd %xmm0, %xmm3
 ; CHECK-NEXT:    cvttsd2si %xmm3, %rax
 ; CHECK-NEXT:    setbe %cl
@@ -5119,10 +5369,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 {
 ; AVX1-NEXT:    vcomisd %xmm2, %xmm0
 ; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT:    ja .LBB124_2
+; AVX1-NEXT:    ja .LBB130_2
 ; AVX1-NEXT:  # %bb.1: # %entry
 ; AVX1-NEXT:    vmovapd %xmm0, %xmm3
-; AVX1-NEXT:  .LBB124_2: # %entry
+; AVX1-NEXT:  .LBB130_2: # %entry
 ; AVX1-NEXT:    vsubsd %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vcvttsd2si %xmm2, %rax
 ; AVX1-NEXT:    setbe %cl
@@ -5132,10 +5382,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 {
 ; AVX1-NEXT:    vmovq %rcx, %xmm2
 ; AVX1-NEXT:    vmovsd {{.*#+}} xmm3 = [4.2100000000000001E+1,0.0E+0]
 ; AVX1-NEXT:    vcomisd %xmm3, %xmm0
-; AVX1-NEXT:    ja .LBB124_4
+; AVX1-NEXT:    ja .LBB130_4
 ; AVX1-NEXT:  # %bb.3: # %entry
 ; AVX1-NEXT:    vmovapd %xmm0, %xmm1
-; AVX1-NEXT:  .LBB124_4: # %entry
+; AVX1-NEXT:  .LBB130_4: # %entry
 ; AVX1-NEXT:    vsubsd %xmm1, %xmm3, %xmm0
 ; AVX1-NEXT:    vcvttsd2si %xmm0, %rax
 ; AVX1-NEXT:    setbe %cl
@@ -5177,10 +5427,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 {
 ; CHECK-NEXT:    comisd %xmm2, %xmm1
 ; CHECK-NEXT:    xorpd %xmm0, %xmm0
 ; CHECK-NEXT:    xorpd %xmm3, %xmm3
-; CHECK-NEXT:    ja .LBB125_2
+; CHECK-NEXT:    ja .LBB131_2
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    movapd %xmm1, %xmm3
-; CHECK-NEXT:  .LBB125_2: # %entry
+; CHECK-NEXT:  .LBB131_2: # %entry
 ; CHECK-NEXT:    subsd %xmm3, %xmm2
 ; CHECK-NEXT:    cvttsd2si %xmm2, %rcx
 ; CHECK-NEXT:    setbe %al
@@ -5190,10 +5440,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 {
 ; CHECK-NEXT:    movsd {{.*#+}} xmm2 = [4.2200000000000003E+1,0.0E+0]
 ; CHECK-NEXT:    comisd %xmm2, %xmm1
 ; CHECK-NEXT:    xorpd %xmm3, %xmm3
-; CHECK-NEXT:    ja .LBB125_4
+; CHECK-NEXT:    ja .LBB131_4
 ; CHECK-NEXT:  # %bb.3: # %entry
 ; CHECK-NEXT:    movapd %xmm1, %xmm3
-; CHECK-NEXT:  .LBB125_4: # %entry
+; CHECK-NEXT:  .LBB131_4: # %entry
 ; CHECK-NEXT:    subsd %xmm3, %xmm2
 ; CHECK-NEXT:    cvttsd2si %xmm2, %rcx
 ; CHECK-NEXT:    setbe %dl
@@ -5202,10 +5452,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 {
 ; CHECK-NEXT:    xorq %rcx, %rdx
 ; CHECK-NEXT:    movsd {{.*#+}} xmm2 = [4.2299999999999997E+1,0.0E+0]
 ; CHECK-NEXT:    comisd %xmm2, %xmm1
-; CHECK-NEXT:    ja .LBB125_6
+; CHECK-NEXT:    ja .LBB131_6
 ; CHECK-NEXT:  # %bb.5: # %entry
 ; CHECK-NEXT:    movapd %xmm1, %xmm0
-; CHECK-NEXT:  .LBB125_6: # %entry
+; CHECK-NEXT:  .LBB131_6: # %entry
 ; CHECK-NEXT:    subsd %xmm0, %xmm2
 ; CHECK-NEXT:    cvttsd2si %xmm2, %rsi
 ; CHECK-NEXT:    setbe %cl
@@ -5221,10 +5471,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 {
 ; AVX1-NEXT:    vcomisd %xmm2, %xmm0
 ; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT:    ja .LBB125_2
+; AVX1-NEXT:    ja .LBB131_2
 ; AVX1-NEXT:  # %bb.1: # %entry
 ; AVX1-NEXT:    vmovapd %xmm0, %xmm3
-; AVX1-NEXT:  .LBB125_2: # %entry
+; AVX1-NEXT:  .LBB131_2: # %entry
 ; AVX1-NEXT:    vsubsd %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vcvttsd2si %xmm2, %rax
 ; AVX1-NEXT:    setbe %cl
@@ -5235,10 +5485,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 {
 ; AVX1-NEXT:    vmovsd {{.*#+}} xmm3 = [4.2100000000000001E+1,0.0E+0]
 ; AVX1-NEXT:    vcomisd %xmm3, %xmm0
 ; AVX1-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
-; AVX1-NEXT:    ja .LBB125_4
+; AVX1-NEXT:    ja .LBB131_4
 ; AVX1-NEXT:  # %bb.3: # %entry
 ; AVX1-NEXT:    vmovapd %xmm0, %xmm4
-; AVX1-NEXT:  .LBB125_4: # %entry
+; AVX1-NEXT:  .LBB131_4: # %entry
 ; AVX1-NEXT:    vsubsd %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vcvttsd2si %xmm3, %rax
 ; AVX1-NEXT:    setbe %cl
@@ -5249,10 +5499,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 {
 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
 ; AVX1-NEXT:    vmovsd {{.*#+}} xmm3 = [4.2299999999999997E+1,0.0E+0]
 ; AVX1-NEXT:    vcomisd %xmm3, %xmm0
-; AVX1-NEXT:    ja .LBB125_6
+; AVX1-NEXT:    ja .LBB131_6
 ; AVX1-NEXT:  # %bb.5: # %entry
 ; AVX1-NEXT:    vmovapd %xmm0, %xmm1
-; AVX1-NEXT:  .LBB125_6: # %entry
+; AVX1-NEXT:  .LBB131_6: # %entry
 ; AVX1-NEXT:    vsubsd %xmm1, %xmm3, %xmm0
 ; AVX1-NEXT:    vcvttsd2si %xmm0, %rax
 ; AVX1-NEXT:    setbe %cl
@@ -5290,10 +5540,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
 ; CHECK-NEXT:    comisd %xmm0, %xmm2
 ; CHECK-NEXT:    xorpd %xmm1, %xmm1
 ; CHECK-NEXT:    xorpd %xmm3, %xmm3
-; CHECK-NEXT:    ja .LBB126_2
+; CHECK-NEXT:    ja .LBB132_2
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    movapd %xmm2, %xmm3
-; CHECK-NEXT:  .LBB126_2: # %entry
+; CHECK-NEXT:  .LBB132_2: # %entry
 ; CHECK-NEXT:    subsd %xmm3, %xmm0
 ; CHECK-NEXT:    cvttsd2si %xmm0, %rcx
 ; CHECK-NEXT:    setbe %al
@@ -5303,10 +5553,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
 ; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
 ; CHECK-NEXT:    comisd %xmm0, %xmm2
 ; CHECK-NEXT:    xorpd %xmm4, %xmm4
-; CHECK-NEXT:    ja .LBB126_4
+; CHECK-NEXT:    ja .LBB132_4
 ; CHECK-NEXT:  # %bb.3: # %entry
 ; CHECK-NEXT:    movapd %xmm2, %xmm4
-; CHECK-NEXT:  .LBB126_4: # %entry
+; CHECK-NEXT:  .LBB132_4: # %entry
 ; CHECK-NEXT:    movq %rax, %xmm3
 ; CHECK-NEXT:    subsd %xmm4, %xmm0
 ; CHECK-NEXT:    cvttsd2si %xmm0, %rax
@@ -5318,10 +5568,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
 ; CHECK-NEXT:    movsd {{.*#+}} xmm4 = [4.2399999999999999E+1,0.0E+0]
 ; CHECK-NEXT:    comisd %xmm4, %xmm2
 ; CHECK-NEXT:    xorpd %xmm5, %xmm5
-; CHECK-NEXT:    ja .LBB126_6
+; CHECK-NEXT:    ja .LBB132_6
 ; CHECK-NEXT:  # %bb.5: # %entry
 ; CHECK-NEXT:    movapd %xmm2, %xmm5
-; CHECK-NEXT:  .LBB126_6: # %entry
+; CHECK-NEXT:  .LBB132_6: # %entry
 ; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
 ; CHECK-NEXT:    subsd %xmm5, %xmm4
 ; CHECK-NEXT:    cvttsd2si %xmm4, %rax
@@ -5332,10 +5582,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
 ; CHECK-NEXT:    movq %rcx, %xmm3
 ; CHECK-NEXT:    movsd {{.*#+}} xmm4 = [4.2299999999999997E+1,0.0E+0]
 ; CHECK-NEXT:    comisd %xmm4, %xmm2
-; CHECK-NEXT:    ja .LBB126_8
+; CHECK-NEXT:    ja .LBB132_8
 ; CHECK-NEXT:  # %bb.7: # %entry
 ; CHECK-NEXT:    movapd %xmm2, %xmm1
-; CHECK-NEXT:  .LBB126_8: # %entry
+; CHECK-NEXT:  .LBB132_8: # %entry
 ; CHECK-NEXT:    subsd %xmm1, %xmm4
 ; CHECK-NEXT:    cvttsd2si %xmm4, %rax
 ; CHECK-NEXT:    setbe %cl
@@ -5353,10 +5603,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
 ; AVX1-NEXT:    vcomisd %xmm2, %xmm0
 ; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT:    ja .LBB126_2
+; AVX1-NEXT:    ja .LBB132_2
 ; AVX1-NEXT:  # %bb.1: # %entry
 ; AVX1-NEXT:    vmovapd %xmm0, %xmm3
-; AVX1-NEXT:  .LBB126_2: # %entry
+; AVX1-NEXT:  .LBB132_2: # %entry
 ; AVX1-NEXT:    vsubsd %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vcvttsd2si %xmm2, %rcx
 ; AVX1-NEXT:    setbe %al
@@ -5366,10 +5616,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
 ; AVX1-NEXT:    vmovsd {{.*#+}} xmm3 = [4.2299999999999997E+1,0.0E+0]
 ; AVX1-NEXT:    vcomisd %xmm3, %xmm0
 ; AVX1-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
-; AVX1-NEXT:    ja .LBB126_4
+; AVX1-NEXT:    ja .LBB132_4
 ; AVX1-NEXT:  # %bb.3: # %entry
 ; AVX1-NEXT:    vmovapd %xmm0, %xmm4
-; AVX1-NEXT:  .LBB126_4: # %entry
+; AVX1-NEXT:  .LBB132_4: # %entry
 ; AVX1-NEXT:    vmovq %rax, %xmm2
 ; AVX1-NEXT:    vsubsd %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vcvttsd2si %xmm3, %rax
@@ -5381,10 +5631,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
 ; AVX1-NEXT:    vmovsd {{.*#+}} xmm4 = [4.2200000000000003E+1,0.0E+0]
 ; AVX1-NEXT:    vcomisd %xmm4, %xmm0
 ; AVX1-NEXT:    vxorpd %xmm5, %xmm5, %xmm5
-; AVX1-NEXT:    ja .LBB126_6
+; AVX1-NEXT:    ja .LBB132_6
 ; AVX1-NEXT:  # %bb.5: # %entry
 ; AVX1-NEXT:    vmovapd %xmm0, %xmm5
-; AVX1-NEXT:  .LBB126_6: # %entry
+; AVX1-NEXT:  .LBB132_6: # %entry
 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
 ; AVX1-NEXT:    vsubsd %xmm5, %xmm4, %xmm3
 ; AVX1-NEXT:    vcvttsd2si %xmm3, %rax
@@ -5395,10 +5645,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
 ; AVX1-NEXT:    vmovq %rcx, %xmm3
 ; AVX1-NEXT:    vmovsd {{.*#+}} xmm4 = [4.2100000000000001E+1,0.0E+0]
 ; AVX1-NEXT:    vcomisd %xmm4, %xmm0
-; AVX1-NEXT:    ja .LBB126_8
+; AVX1-NEXT:    ja .LBB132_8
 ; AVX1-NEXT:  # %bb.7: # %entry
 ; AVX1-NEXT:    vmovapd %xmm0, %xmm1
-; AVX1-NEXT:  .LBB126_8: # %entry
+; AVX1-NEXT:  .LBB132_8: # %entry
 ; AVX1-NEXT:    vsubsd %xmm1, %xmm4, %xmm0
 ; AVX1-NEXT:    vcvttsd2si %xmm0, %rax
 ; AVX1-NEXT:    setbe %cl
@@ -5620,108 +5870,121 @@ entry:
   ret <4 x double> %result
 }
 
-define <1 x float> @constrained_vector_ceil_v1f32() #0 {
-; CHECK-LABEL: constrained_vector_ceil_v1f32:
+define <1 x float> @constrained_vector_ceil_v1f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_ceil_v1f32_var:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    callq ceilf at PLT
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_ceil_v1f32:
+; AVX-LABEL: constrained_vector_ceil_v1f32_var:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; AVX-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 entry:
+  %b = load <1 x float>, ptr %a
   %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(
-                               <1 x float> <float 1.5>,
+                               <1 x float> %b,
                                metadata !"fpexcept.strict") #0
   ret <1 x float> %ceil
 }
 
-define <2 x double> @constrained_vector_ceil_v2f64() #0 {
-; CHECK-LABEL: constrained_vector_ceil_v2f64:
+define <2 x double> @constrained_vector_ceil_v2f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_ceil_v2f64_var:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subq $24, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
-; CHECK-NEXT:    callq ceil at PLT
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    movaps (%rdi), %xmm0
 ; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
 ; CHECK-NEXT:    callq ceil at PLT
-; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    callq ceil at PLT
+; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    addq $40, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_ceil_v2f64:
+; AVX-LABEL: constrained_vector_ceil_v2f64_var:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vroundpd $10, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX-NEXT:    vroundpd $10, (%rdi), %xmm0
 ; AVX-NEXT:    retq
 entry:
+  %b = load <2 x double>, ptr %a
   %ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
-                                <2 x double> <double 1.1, double 1.9>,
+                                <2 x double> %b,
                                 metadata !"fpexcept.strict") #0
   ret <2 x double> %ceil
 }
 
-define <3 x float> @constrained_vector_ceil_v3f32() #0 {
-; CHECK-LABEL: constrained_vector_ceil_v3f32:
+define <3 x float> @constrained_vector_ceil_v3f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_ceil_v3f32_var:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subq $40, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    subq $56, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    movaps (%rdi), %xmm0
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
 ; CHECK-NEXT:    callq ceilf at PLT
 ; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
 ; CHECK-NEXT:    callq ceilf at PLT
-; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
 ; CHECK-NEXT:    callq ceilf at PLT
-; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT:    movaps %xmm1, %xmm0
-; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT:    addq $56, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_ceil_v3f32:
+; AVX-LABEL: constrained_vector_ceil_v3f32_var:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; AVX-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0
-; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; AVX-NEXT:    vroundss $10, %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vmovss {{.*#+}} xmm2 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
 ; AVX-NEXT:    vroundss $10, %xmm2, %xmm2, %xmm2
 ; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
 ; AVX-NEXT:    retq
 entry:
+  %b = load <3 x float>, ptr %a
   %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
-                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              <3 x float> %b,
                               metadata !"fpexcept.strict") #0
   ret <3 x float> %ceil
 }
 
-define <3 x double> @constrained_vector_ceil_v3f64() #0 {
-; CHECK-LABEL: constrained_vector_ceil_v3f64:
+define <3 x double> @constrained_vector_ceil_v3f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_ceil_v3f64_var:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subq $24, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movaps (%rdi), %xmm0
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
 ; CHECK-NEXT:    callq ceil at PLT
 ; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
 ; CHECK-NEXT:    callq ceil at PLT
 ; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    # xmm0 = mem[0],zero
 ; CHECK-NEXT:    callq ceil at PLT
 ; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
@@ -5730,127 +5993,141 @@ define <3 x double> @constrained_vector_ceil_v3f64() #0 {
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
 ; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
 ; CHECK-NEXT:    # xmm1 = mem[0],zero
-; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    addq $40, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_ceil_v3f64:
+; AVX-LABEL: constrained_vector_ceil_v3f64_var:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; AVX-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0
-; AVX-NEXT:    vroundpd $10, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX-NEXT:    vroundpd $10, (%rdi), %xmm1
 ; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX-NEXT:    retq
 entry:
+  %b = load <3 x double>, ptr %a
   %ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64(
-                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          <3 x double> %b,
                           metadata !"fpexcept.strict") #0
   ret <3 x double> %ceil
 }
 
-define <1 x float> @constrained_vector_floor_v1f32() #0 {
-; CHECK-LABEL: constrained_vector_floor_v1f32:
+define <1 x float> @constrained_vector_floor_v1f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_floor_v1f32_var:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    callq floorf at PLT
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_floor_v1f32:
+; AVX-LABEL: constrained_vector_floor_v1f32_var:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; AVX-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 entry:
+  %b = load <1 x float>, ptr %a
   %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32(
-                               <1 x float> <float 1.5>,
+                               <1 x float> %b,
                                metadata !"fpexcept.strict") #0
   ret <1 x float> %floor
 }
 
 
-define <2 x double> @constrained_vector_floor_v2f64() #0 {
-; CHECK-LABEL: constrained_vector_floor_v2f64:
+define <2 x double> @constrained_vector_floor_v2f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_floor_v2f64_var:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subq $24, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
-; CHECK-NEXT:    callq floor at PLT
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    movaps (%rdi), %xmm0
 ; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
 ; CHECK-NEXT:    callq floor at PLT
-; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    callq floor at PLT
+; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    addq $40, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_floor_v2f64:
+; AVX-LABEL: constrained_vector_floor_v2f64_var:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vroundpd $9, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX-NEXT:    vroundpd $9, (%rdi), %xmm0
 ; AVX-NEXT:    retq
 entry:
+  %b = load <2 x double>, ptr %a
   %floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
-                                <2 x double> <double 1.1, double 1.9>,
+                                <2 x double> %b,
                                 metadata !"fpexcept.strict") #0
   ret <2 x double> %floor
 }
 
-define <3 x float> @constrained_vector_floor_v3f32() #0 {
-; CHECK-LABEL: constrained_vector_floor_v3f32:
+define <3 x float> @constrained_vector_floor_v3f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_floor_v3f32_var:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subq $40, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    subq $56, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    movaps (%rdi), %xmm0
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
 ; CHECK-NEXT:    callq floorf at PLT
 ; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
 ; CHECK-NEXT:    callq floorf at PLT
-; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
 ; CHECK-NEXT:    callq floorf at PLT
-; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT:    movaps %xmm1, %xmm0
-; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT:    addq $56, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_floor_v3f32:
+; AVX-LABEL: constrained_vector_floor_v3f32_var:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; AVX-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0
-; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; AVX-NEXT:    vroundss $9, %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vmovss {{.*#+}} xmm2 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
 ; AVX-NEXT:    vroundss $9, %xmm2, %xmm2, %xmm2
 ; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
 ; AVX-NEXT:    retq
 entry:
+  %b = load <3 x float>, ptr %a
   %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
-                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              <3 x float> %b,
                               metadata !"fpexcept.strict") #0
   ret <3 x float> %floor
 }
 
-define <3 x double> @constrained_vector_floor_v3f64() #0 {
-; CHECK-LABEL: constrained_vector_floor_v3f64:
+define <3 x double> @constrained_vector_floor_v3f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_floor_v3f64_var:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subq $24, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movaps (%rdi), %xmm0
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
 ; CHECK-NEXT:    callq floor at PLT
 ; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
 ; CHECK-NEXT:    callq floor at PLT
 ; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    # xmm0 = mem[0],zero
 ; CHECK-NEXT:    callq floor at PLT
 ; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
@@ -5859,149 +6136,175 @@ define <3 x double> @constrained_vector_floor_v3f64() #0 {
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
 ; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
 ; CHECK-NEXT:    # xmm1 = mem[0],zero
-; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    addq $40, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_floor_v3f64:
+; AVX-LABEL: constrained_vector_floor_v3f64_var:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; AVX-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0
-; AVX-NEXT:    vroundpd $9, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX-NEXT:    vroundpd $9, (%rdi), %xmm1
 ; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX-NEXT:    retq
 entry:
+  %b = load <3 x double>, ptr %a
   %floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64(
-                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          <3 x double> %b,
                           metadata !"fpexcept.strict") #0
   ret <3 x double> %floor
 }
 
-define <1 x float> @constrained_vector_round_v1f32() #0 {
-; CHECK-LABEL: constrained_vector_round_v1f32:
+define <1 x float> @constrained_vector_round_v1f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_round_v1f32_var:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    callq roundf at PLT
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_round_v1f32:
+; AVX-LABEL: constrained_vector_round_v1f32_var:
 ; AVX:       # %bb.0: # %entry
 ; AVX-NEXT:    pushq %rax
 ; AVX-NEXT:    .cfi_def_cfa_offset 16
-; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; AVX-NEXT:    callq roundf at PLT
 ; AVX-NEXT:    popq %rax
 ; AVX-NEXT:    .cfi_def_cfa_offset 8
 ; AVX-NEXT:    retq
 entry:
+  %b = load <1 x float>, ptr %a
   %round = call <1 x float> @llvm.experimental.constrained.round.v1f32(
-                               <1 x float> <float 1.5>,
+                               <1 x float> %b,
                                metadata !"fpexcept.strict") #0
   ret <1 x float> %round
 }
 
-define <2 x double> @constrained_vector_round_v2f64() #0 {
-; CHECK-LABEL: constrained_vector_round_v2f64:
+define <2 x double> @constrained_vector_round_v2f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_round_v2f64_var:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subq $24, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
-; CHECK-NEXT:    callq round at PLT
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    movaps (%rdi), %xmm0
 ; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
 ; CHECK-NEXT:    callq round at PLT
-; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    callq round at PLT
+; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    addq $40, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_round_v2f64:
+; AVX-LABEL: constrained_vector_round_v2f64_var:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    subq $24, %rsp
-; AVX-NEXT:    .cfi_def_cfa_offset 32
-; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
+; AVX-NEXT:    subq $40, %rsp
+; AVX-NEXT:    .cfi_def_cfa_offset 48
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; AVX-NEXT:    callq round at PLT
-; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
-; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
+; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; AVX-NEXT:    # xmm0 = mem[0],zero
 ; AVX-NEXT:    callq round at PLT
-; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
 ; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
-; AVX-NEXT:    addq $24, %rsp
+; AVX-NEXT:    addq $40, %rsp
 ; AVX-NEXT:    .cfi_def_cfa_offset 8
 ; AVX-NEXT:    retq
 entry:
+  %b = load <2 x double>, ptr %a
   %round = call <2 x double> @llvm.experimental.constrained.round.v2f64(
-                                <2 x double> <double 1.1, double 1.9>,
+                                <2 x double> %b,
                                 metadata !"fpexcept.strict") #0
   ret <2 x double> %round
 }
 
-define <3 x float> @constrained_vector_round_v3f32() #0 {
-; CHECK-LABEL: constrained_vector_round_v3f32:
+define <3 x float> @constrained_vector_round_v3f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_round_v3f32_var:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subq $40, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    subq $56, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    movaps (%rdi), %xmm0
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
 ; CHECK-NEXT:    callq roundf at PLT
 ; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
 ; CHECK-NEXT:    callq roundf at PLT
-; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
 ; CHECK-NEXT:    callq roundf at PLT
-; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT:    movaps %xmm1, %xmm0
-; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT:    addq $56, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_round_v3f32:
+; AVX-LABEL: constrained_vector_round_v3f32_var:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    subq $40, %rsp
-; AVX-NEXT:    .cfi_def_cfa_offset 48
-; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    pushq %rbx
+; AVX-NEXT:    .cfi_def_cfa_offset 16
+; AVX-NEXT:    subq $48, %rsp
+; AVX-NEXT:    .cfi_def_cfa_offset 64
+; AVX-NEXT:    .cfi_offset %rbx, -16
+; AVX-NEXT:    movq %rdi, %rbx
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; AVX-NEXT:    callq roundf at PLT
 ; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT:    vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 ; AVX-NEXT:    callq roundf at PLT
-; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
-; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
 ; AVX-NEXT:    callq roundf at PLT
-; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
+; AVX-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
 ; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
 ; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
-; AVX-NEXT:    addq $40, %rsp
+; AVX-NEXT:    addq $48, %rsp
+; AVX-NEXT:    .cfi_def_cfa_offset 16
+; AVX-NEXT:    popq %rbx
 ; AVX-NEXT:    .cfi_def_cfa_offset 8
 ; AVX-NEXT:    retq
 entry:
+  %b = load <3 x float>, ptr %a
   %round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
-                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              <3 x float> %b,
                               metadata !"fpexcept.strict") #0
   ret <3 x float> %round
 }
 
 
-define <3 x double> @constrained_vector_round_v3f64() #0 {
-; CHECK-LABEL: constrained_vector_round_v3f64:
+define <3 x double> @constrained_vector_round_v3f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_round_v3f64_var:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subq $24, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movaps (%rdi), %xmm0
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
 ; CHECK-NEXT:    callq round at PLT
 ; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
 ; CHECK-NEXT:    callq round at PLT
 ; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    # xmm0 = mem[0],zero
 ; CHECK-NEXT:    callq round at PLT
 ; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
@@ -6010,139 +6313,162 @@ define <3 x double> @constrained_vector_round_v3f64() #0 {
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
 ; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
 ; CHECK-NEXT:    # xmm1 = mem[0],zero
-; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    addq $40, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_round_v3f64:
+; AVX-LABEL: constrained_vector_round_v3f64_var:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    subq $40, %rsp
-; AVX-NEXT:    .cfi_def_cfa_offset 48
-; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
+; AVX-NEXT:    pushq %rbx
+; AVX-NEXT:    .cfi_def_cfa_offset 16
+; AVX-NEXT:    subq $48, %rsp
+; AVX-NEXT:    .cfi_def_cfa_offset 64
+; AVX-NEXT:    .cfi_offset %rbx, -16
+; AVX-NEXT:    movq %rdi, %rbx
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; AVX-NEXT:    callq round at PLT
 ; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
-; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
+; AVX-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; AVX-NEXT:    # xmm0 = mem[0],zero
 ; AVX-NEXT:    callq round at PLT
 ; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
 ; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
-; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
-; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; AVX-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    callq round at PLT
-; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
+; AVX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
 ; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX-NEXT:    addq $40, %rsp
+; AVX-NEXT:    addq $48, %rsp
+; AVX-NEXT:    .cfi_def_cfa_offset 16
+; AVX-NEXT:    popq %rbx
 ; AVX-NEXT:    .cfi_def_cfa_offset 8
 ; AVX-NEXT:    retq
 entry:
+  %b = load <3 x double>, ptr %a
   %round = call <3 x double> @llvm.experimental.constrained.round.v3f64(
-                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          <3 x double> %b,
                           metadata !"fpexcept.strict") #0
   ret <3 x double> %round
 }
 
-define <1 x float> @constrained_vector_trunc_v1f32() #0 {
-; CHECK-LABEL: constrained_vector_trunc_v1f32:
+define <1 x float> @constrained_vector_trunc_v1f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_trunc_v1f32_var:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    callq truncf at PLT
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_trunc_v1f32:
+; AVX-LABEL: constrained_vector_trunc_v1f32_var:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; AVX-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 entry:
+  %b = load <1 x float>, ptr %a
   %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(
-                               <1 x float> <float 1.5>,
+                               <1 x float> %b,
                                metadata !"fpexcept.strict") #0
   ret <1 x float> %trunc
 }
 
-define <2 x double> @constrained_vector_trunc_v2f64() #0 {
-; CHECK-LABEL: constrained_vector_trunc_v2f64:
+define <2 x double> @constrained_vector_trunc_v2f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_trunc_v2f64_var:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subq $24, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
-; CHECK-NEXT:    callq trunc at PLT
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    movaps (%rdi), %xmm0
 ; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
 ; CHECK-NEXT:    callq trunc at PLT
-; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    callq trunc at PLT
+; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    addq $40, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_trunc_v2f64:
+; AVX-LABEL: constrained_vector_trunc_v2f64_var:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vroundpd $11, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX-NEXT:    vroundpd $11, (%rdi), %xmm0
 ; AVX-NEXT:    retq
 entry:
+  %b = load <2 x double>, ptr %a
   %trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
-                                <2 x double> <double 1.1, double 1.9>,
+                                <2 x double> %b,
                                 metadata !"fpexcept.strict") #0
   ret <2 x double> %trunc
 }
 
-define <3 x float> @constrained_vector_trunc_v3f32() #0 {
-; CHECK-LABEL: constrained_vector_trunc_v3f32:
+define <3 x float> @constrained_vector_trunc_v3f32_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_trunc_v3f32_var:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subq $40, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    subq $56, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    movaps (%rdi), %xmm0
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
 ; CHECK-NEXT:    callq truncf at PLT
 ; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
 ; CHECK-NEXT:    callq truncf at PLT
-; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
 ; CHECK-NEXT:    callq truncf at PLT
-; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT:    movaps %xmm1, %xmm0
-; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT:    addq $56, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_trunc_v3f32:
+; AVX-LABEL: constrained_vector_trunc_v3f32_var:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; AVX-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
-; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; AVX-NEXT:    vroundss $11, %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vmovss {{.*#+}} xmm2 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0]
 ; AVX-NEXT:    vroundss $11, %xmm2, %xmm2, %xmm2
 ; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
 ; AVX-NEXT:    retq
 entry:
+  %b = load <3 x float>, ptr %a
   %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
-                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              <3 x float> %b,
                               metadata !"fpexcept.strict") #0
   ret <3 x float> %trunc
 }
 
-define <3 x double> @constrained_vector_trunc_v3f64() #0 {
-; CHECK-LABEL: constrained_vector_trunc_v3f64:
+define <3 x double> @constrained_vector_trunc_v3f64_var(ptr %a) #0 {
+; CHECK-LABEL: constrained_vector_trunc_v3f64_var:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subq $24, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0]
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movaps (%rdi), %xmm0
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
 ; CHECK-NEXT:    callq trunc at PLT
 ; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0]
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
 ; CHECK-NEXT:    callq trunc at PLT
 ; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    # xmm0 = mem[0],zero
 ; CHECK-NEXT:    callq trunc at PLT
 ; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
@@ -6151,20 +6477,21 @@ define <3 x double> @constrained_vector_trunc_v3f64() #0 {
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
 ; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
 ; CHECK-NEXT:    # xmm1 = mem[0],zero
-; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    addq $40, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
-; AVX-LABEL: constrained_vector_trunc_v3f64:
+; AVX-LABEL: constrained_vector_trunc_v3f64_var:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0]
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; AVX-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
-; AVX-NEXT:    vroundpd $11, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX-NEXT:    vroundpd $11, (%rdi), %xmm1
 ; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX-NEXT:    retq
 entry:
+  %b = load <3 x double>, ptr %a
   %trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64(
-                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          <3 x double> %b,
                           metadata !"fpexcept.strict") #0
   ret <3 x double> %trunc
 }
@@ -6757,10 +7084,10 @@ define <1 x double> @constrained_vector_uitofp_v1f64_v1i64(<1 x i64> %x) #0 {
 ; CHECK-NEXT:    testq %rdi, %rdi
 ; CHECK-NEXT:    cmovnsq %rdi, %rcx
 ; CHECK-NEXT:    cvtsi2sd %rcx, %xmm0
-; CHECK-NEXT:    jns .LBB169_2
+; CHECK-NEXT:    jns .LBB175_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    addsd %xmm0, %xmm0
-; CHECK-NEXT:  .LBB169_2: # %entry
+; CHECK-NEXT:  .LBB175_2: # %entry
 ; CHECK-NEXT:    retq
 ;
 ; AVX1-LABEL: constrained_vector_uitofp_v1f64_v1i64:
@@ -6773,10 +7100,10 @@ define <1 x double> @constrained_vector_uitofp_v1f64_v1i64(<1 x i64> %x) #0 {
 ; AVX1-NEXT:    testq %rdi, %rdi
 ; AVX1-NEXT:    cmovnsq %rdi, %rcx
 ; AVX1-NEXT:    vcvtsi2sd %rcx, %xmm0, %xmm0
-; AVX1-NEXT:    jns .LBB169_2
+; AVX1-NEXT:    jns .LBB175_2
 ; AVX1-NEXT:  # %bb.1:
 ; AVX1-NEXT:    vaddsd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:  .LBB169_2: # %entry
+; AVX1-NEXT:  .LBB175_2: # %entry
 ; AVX1-NEXT:    retq
 ;
 ; AVX512-LABEL: constrained_vector_uitofp_v1f64_v1i64:
@@ -6802,10 +7129,10 @@ define <1 x float> @constrained_vector_uitofp_v1f32_v1i64(<1 x i64> %x) #0 {
 ; CHECK-NEXT:    testq %rdi, %rdi
 ; CHECK-NEXT:    cmovnsq %rdi, %rcx
 ; CHECK-NEXT:    cvtsi2ss %rcx, %xmm0
-; CHECK-NEXT:    jns .LBB170_2
+; CHECK-NEXT:    jns .LBB176_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    addss %xmm0, %xmm0
-; CHECK-NEXT:  .LBB170_2: # %entry
+; CHECK-NEXT:  .LBB176_2: # %entry
 ; CHECK-NEXT:    retq
 ;
 ; AVX1-LABEL: constrained_vector_uitofp_v1f32_v1i64:
@@ -6818,10 +7145,10 @@ define <1 x float> @constrained_vector_uitofp_v1f32_v1i64(<1 x i64> %x) #0 {
 ; AVX1-NEXT:    testq %rdi, %rdi
 ; AVX1-NEXT:    cmovnsq %rdi, %rcx
 ; AVX1-NEXT:    vcvtsi2ss %rcx, %xmm0, %xmm0
-; AVX1-NEXT:    jns .LBB170_2
+; AVX1-NEXT:    jns .LBB176_2
 ; AVX1-NEXT:  # %bb.1:
 ; AVX1-NEXT:    vaddss %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:  .LBB170_2: # %entry
+; AVX1-NEXT:  .LBB176_2: # %entry
 ; AVX1-NEXT:    retq
 ;
 ; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i64:
@@ -6920,10 +7247,10 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 {
 ; CHECK-NEXT:    cmovnsq %rax, %rdx
 ; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:    cvtsi2sd %rdx, %xmm0
-; CHECK-NEXT:    jns .LBB173_2
+; CHECK-NEXT:    jns .LBB179_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    addsd %xmm0, %xmm0
-; CHECK-NEXT:  .LBB173_2: # %entry
+; CHECK-NEXT:  .LBB179_2: # %entry
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
 ; CHECK-NEXT:    movq %xmm1, %rax
 ; CHECK-NEXT:    movq %rax, %rcx
@@ -6935,10 +7262,10 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 {
 ; CHECK-NEXT:    cmovnsq %rax, %rdx
 ; CHECK-NEXT:    xorps %xmm1, %xmm1
 ; CHECK-NEXT:    cvtsi2sd %rdx, %xmm1
-; CHECK-NEXT:    jns .LBB173_4
+; CHECK-NEXT:    jns .LBB179_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    addsd %xmm1, %xmm1
-; CHECK-NEXT:  .LBB173_4: # %entry
+; CHECK-NEXT:  .LBB179_4: # %entry
 ; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; CHECK-NEXT:    retq
 ;
@@ -6953,10 +7280,10 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 {
 ; AVX1-NEXT:    testq %rax, %rax
 ; AVX1-NEXT:    cmovnsq %rax, %rdx
 ; AVX1-NEXT:    vcvtsi2sd %rdx, %xmm1, %xmm1
-; AVX1-NEXT:    jns .LBB173_2
+; AVX1-NEXT:    jns .LBB179_2
 ; AVX1-NEXT:  # %bb.1:
 ; AVX1-NEXT:    vaddsd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:  .LBB173_2: # %entry
+; AVX1-NEXT:  .LBB179_2: # %entry
 ; AVX1-NEXT:    vmovq %xmm0, %rax
 ; AVX1-NEXT:    movq %rax, %rcx
 ; AVX1-NEXT:    shrq %rcx
@@ -6966,10 +7293,10 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 {
 ; AVX1-NEXT:    testq %rax, %rax
 ; AVX1-NEXT:    cmovnsq %rax, %rdx
 ; AVX1-NEXT:    vcvtsi2sd %rdx, %xmm2, %xmm0
-; AVX1-NEXT:    jns .LBB173_4
+; AVX1-NEXT:    jns .LBB179_4
 ; AVX1-NEXT:  # %bb.3:
 ; AVX1-NEXT:    vaddsd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:  .LBB173_4: # %entry
+; AVX1-NEXT:  .LBB179_4: # %entry
 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX1-NEXT:    retq
 ;
@@ -7011,10 +7338,10 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
 ; CHECK-NEXT:    cmovnsq %rax, %rdx
 ; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:    cvtsi2ss %rdx, %xmm0
-; CHECK-NEXT:    jns .LBB174_2
+; CHECK-NEXT:    jns .LBB180_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    addss %xmm0, %xmm0
-; CHECK-NEXT:  .LBB174_2: # %entry
+; CHECK-NEXT:  .LBB180_2: # %entry
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
 ; CHECK-NEXT:    movq %xmm1, %rax
 ; CHECK-NEXT:    movq %rax, %rcx
@@ -7026,10 +7353,10 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
 ; CHECK-NEXT:    cmovnsq %rax, %rdx
 ; CHECK-NEXT:    xorps %xmm1, %xmm1
 ; CHECK-NEXT:    cvtsi2ss %rdx, %xmm1
-; CHECK-NEXT:    jns .LBB174_4
+; CHECK-NEXT:    jns .LBB180_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    addss %xmm1, %xmm1
-; CHECK-NEXT:  .LBB174_4: # %entry
+; CHECK-NEXT:  .LBB180_4: # %entry
 ; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; CHECK-NEXT:    retq
 ;
@@ -7177,10 +7504,10 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
 ; CHECK-NEXT:    testq %rdi, %rdi
 ; CHECK-NEXT:    cmovnsq %rdi, %rcx
 ; CHECK-NEXT:    cvtsi2sd %rcx, %xmm0
-; CHECK-NEXT:    jns .LBB177_2
+; CHECK-NEXT:    jns .LBB183_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    addsd %xmm0, %xmm0
-; CHECK-NEXT:  .LBB177_2: # %entry
+; CHECK-NEXT:  .LBB183_2: # %entry
 ; CHECK-NEXT:    movq %rsi, %rax
 ; CHECK-NEXT:    shrq %rax
 ; CHECK-NEXT:    movl %esi, %ecx
@@ -7189,10 +7516,10 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
 ; CHECK-NEXT:    testq %rsi, %rsi
 ; CHECK-NEXT:    cmovnsq %rsi, %rcx
 ; CHECK-NEXT:    cvtsi2sd %rcx, %xmm1
-; CHECK-NEXT:    jns .LBB177_4
+; CHECK-NEXT:    jns .LBB183_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    addsd %xmm1, %xmm1
-; CHECK-NEXT:  .LBB177_4: # %entry
+; CHECK-NEXT:  .LBB183_4: # %entry
 ; CHECK-NEXT:    movq %rdx, %rax
 ; CHECK-NEXT:    shrq %rax
 ; CHECK-NEXT:    movl %edx, %ecx
@@ -7201,10 +7528,10 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
 ; CHECK-NEXT:    testq %rdx, %rdx
 ; CHECK-NEXT:    cmovnsq %rdx, %rcx
 ; CHECK-NEXT:    cvtsi2sd %rcx, %xmm2
-; CHECK-NEXT:    jns .LBB177_6
+; CHECK-NEXT:    jns .LBB183_6
 ; CHECK-NEXT:  # %bb.5:
 ; CHECK-NEXT:    addsd %xmm2, %xmm2
-; CHECK-NEXT:  .LBB177_6: # %entry
+; CHECK-NEXT:  .LBB183_6: # %entry
 ; CHECK-NEXT:    movsd %xmm2, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    wait
@@ -7221,10 +7548,10 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
 ; AVX1-NEXT:    testq %rax, %rax
 ; AVX1-NEXT:    cmovnsq %rax, %rdx
 ; AVX1-NEXT:    vcvtsi2sd %rdx, %xmm1, %xmm1
-; AVX1-NEXT:    jns .LBB177_2
+; AVX1-NEXT:    jns .LBB183_2
 ; AVX1-NEXT:  # %bb.1:
 ; AVX1-NEXT:    vaddsd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:  .LBB177_2: # %entry
+; AVX1-NEXT:  .LBB183_2: # %entry
 ; AVX1-NEXT:    vmovq %xmm0, %rax
 ; AVX1-NEXT:    movq %rax, %rcx
 ; AVX1-NEXT:    shrq %rcx
@@ -7234,10 +7561,10 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
 ; AVX1-NEXT:    testq %rax, %rax
 ; AVX1-NEXT:    cmovnsq %rax, %rdx
 ; AVX1-NEXT:    vcvtsi2sd %rdx, %xmm2, %xmm2
-; AVX1-NEXT:    jns .LBB177_4
+; AVX1-NEXT:    jns .LBB183_4
 ; AVX1-NEXT:  # %bb.3:
 ; AVX1-NEXT:    vaddsd %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:  .LBB177_4: # %entry
+; AVX1-NEXT:  .LBB183_4: # %entry
 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vmovq %xmm0, %rax
@@ -7249,10 +7576,10 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
 ; AVX1-NEXT:    testq %rax, %rax
 ; AVX1-NEXT:    cmovnsq %rax, %rdx
 ; AVX1-NEXT:    vcvtsi2sd %rdx, %xmm3, %xmm0
-; AVX1-NEXT:    jns .LBB177_6
+; AVX1-NEXT:    jns .LBB183_6
 ; AVX1-NEXT:  # %bb.5:
 ; AVX1-NEXT:    vaddsd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:  .LBB177_6: # %entry
+; AVX1-NEXT:  .LBB183_6: # %entry
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -7287,10 +7614,10 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; CHECK-NEXT:    testq %rsi, %rsi
 ; CHECK-NEXT:    cmovnsq %rsi, %rcx
 ; CHECK-NEXT:    cvtsi2ss %rcx, %xmm1
-; CHECK-NEXT:    jns .LBB178_2
+; CHECK-NEXT:    jns .LBB184_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    addss %xmm1, %xmm1
-; CHECK-NEXT:  .LBB178_2: # %entry
+; CHECK-NEXT:  .LBB184_2: # %entry
 ; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    shrq %rax
 ; CHECK-NEXT:    movl %edi, %ecx
@@ -7299,10 +7626,10 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; CHECK-NEXT:    testq %rdi, %rdi
 ; CHECK-NEXT:    cmovnsq %rdi, %rcx
 ; CHECK-NEXT:    cvtsi2ss %rcx, %xmm0
-; CHECK-NEXT:    jns .LBB178_4
+; CHECK-NEXT:    jns .LBB184_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    addss %xmm0, %xmm0
-; CHECK-NEXT:  .LBB178_4: # %entry
+; CHECK-NEXT:  .LBB184_4: # %entry
 ; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; CHECK-NEXT:    movq %rdx, %rax
 ; CHECK-NEXT:    shrq %rax
@@ -7313,10 +7640,10 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; CHECK-NEXT:    cmovnsq %rdx, %rcx
 ; CHECK-NEXT:    xorps %xmm1, %xmm1
 ; CHECK-NEXT:    cvtsi2ss %rcx, %xmm1
-; CHECK-NEXT:    jns .LBB178_6
+; CHECK-NEXT:    jns .LBB184_6
 ; CHECK-NEXT:  # %bb.5:
 ; CHECK-NEXT:    addss %xmm1, %xmm1
-; CHECK-NEXT:  .LBB178_6: # %entry
+; CHECK-NEXT:  .LBB184_6: # %entry
 ; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; CHECK-NEXT:    retq
 ;
@@ -7331,10 +7658,10 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; AVX1-NEXT:    testq %rax, %rax
 ; AVX1-NEXT:    cmovnsq %rax, %rdx
 ; AVX1-NEXT:    vcvtsi2ss %rdx, %xmm1, %xmm1
-; AVX1-NEXT:    jns .LBB178_2
+; AVX1-NEXT:    jns .LBB184_2
 ; AVX1-NEXT:  # %bb.1:
 ; AVX1-NEXT:    vaddss %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:  .LBB178_2: # %entry
+; AVX1-NEXT:  .LBB184_2: # %entry
 ; AVX1-NEXT:    vmovq %xmm0, %rax
 ; AVX1-NEXT:    movq %rax, %rcx
 ; AVX1-NEXT:    shrq %rcx
@@ -7344,10 +7671,10 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; AVX1-NEXT:    testq %rax, %rax
 ; AVX1-NEXT:    cmovnsq %rax, %rdx
 ; AVX1-NEXT:    vcvtsi2ss %rdx, %xmm2, %xmm2
-; AVX1-NEXT:    jns .LBB178_4
+; AVX1-NEXT:    jns .LBB184_4
 ; AVX1-NEXT:  # %bb.3:
 ; AVX1-NEXT:    vaddss %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:  .LBB178_4: # %entry
+; AVX1-NEXT:  .LBB184_4: # %entry
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vmovq %xmm0, %rax
@@ -7359,10 +7686,10 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; AVX1-NEXT:    testq %rax, %rax
 ; AVX1-NEXT:    cmovnsq %rax, %rdx
 ; AVX1-NEXT:    vcvtsi2ss %rdx, %xmm3, %xmm0
-; AVX1-NEXT:    jns .LBB178_6
+; AVX1-NEXT:    jns .LBB184_6
 ; AVX1-NEXT:  # %bb.5:
 ; AVX1-NEXT:    vaddss %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:  .LBB178_6: # %entry
+; AVX1-NEXT:  .LBB184_6: # %entry
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -7477,10 +7804,10 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 {
 ; CHECK-NEXT:    cmovnsq %rax, %rdx
 ; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:    cvtsi2sd %rdx, %xmm0
-; CHECK-NEXT:    jns .LBB181_2
+; CHECK-NEXT:    jns .LBB187_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    addsd %xmm0, %xmm0
-; CHECK-NEXT:  .LBB181_2: # %entry
+; CHECK-NEXT:  .LBB187_2: # %entry
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
 ; CHECK-NEXT:    movq %xmm2, %rax
 ; CHECK-NEXT:    movq %rax, %rcx
@@ -7491,10 +7818,10 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 {
 ; CHECK-NEXT:    testq %rax, %rax
 ; CHECK-NEXT:    cmovnsq %rax, %rdx
 ; CHECK-NEXT:    cvtsi2sd %rdx, %xmm3
-; CHECK-NEXT:    jns .LBB181_4
+; CHECK-NEXT:    jns .LBB187_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    addsd %xmm3, %xmm3
-; CHECK-NEXT:  .LBB181_4: # %entry
+; CHECK-NEXT:  .LBB187_4: # %entry
 ; CHECK-NEXT:    movq %xmm1, %rax
 ; CHECK-NEXT:    movq %rax, %rcx
 ; CHECK-NEXT:    shrq %rcx
@@ -7505,10 +7832,10 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 {
 ; CHECK-NEXT:    cmovnsq %rax, %rdx
 ; CHECK-NEXT:    xorps %xmm2, %xmm2
 ; CHECK-NEXT:    cvtsi2sd %rdx, %xmm2
-; CHECK-NEXT:    jns .LBB181_6
+; CHECK-NEXT:    jns .LBB187_6
 ; CHECK-NEXT:  # %bb.5:
 ; CHECK-NEXT:    addsd %xmm2, %xmm2
-; CHECK-NEXT:  .LBB181_6: # %entry
+; CHECK-NEXT:  .LBB187_6: # %entry
 ; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
 ; CHECK-NEXT:    movq %xmm1, %rax
@@ -7521,10 +7848,10 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 {
 ; CHECK-NEXT:    cmovnsq %rax, %rdx
 ; CHECK-NEXT:    xorps %xmm1, %xmm1
 ; CHECK-NEXT:    cvtsi2sd %rdx, %xmm1
-; CHECK-NEXT:    jns .LBB181_8
+; CHECK-NEXT:    jns .LBB187_8
 ; CHECK-NEXT:  # %bb.7:
 ; CHECK-NEXT:    addsd %xmm1, %xmm1
-; CHECK-NEXT:  .LBB181_8: # %entry
+; CHECK-NEXT:  .LBB187_8: # %entry
 ; CHECK-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
 ; CHECK-NEXT:    movapd %xmm2, %xmm1
 ; CHECK-NEXT:    retq
@@ -7601,10 +7928,10 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
 ; CHECK-NEXT:    testq %rax, %rax
 ; CHECK-NEXT:    cmovnsq %rax, %rdx
 ; CHECK-NEXT:    cvtsi2ss %rdx, %xmm2
-; CHECK-NEXT:    jns .LBB182_2
+; CHECK-NEXT:    jns .LBB188_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    addss %xmm2, %xmm2
-; CHECK-NEXT:  .LBB182_2: # %entry
+; CHECK-NEXT:  .LBB188_2: # %entry
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
 ; CHECK-NEXT:    movq %xmm1, %rax
 ; CHECK-NEXT:    movq %rax, %rcx
@@ -7615,10 +7942,10 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
 ; CHECK-NEXT:    testq %rax, %rax
 ; CHECK-NEXT:    cmovnsq %rax, %rdx
 ; CHECK-NEXT:    cvtsi2ss %rdx, %xmm3
-; CHECK-NEXT:    jns .LBB182_4
+; CHECK-NEXT:    jns .LBB188_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    addss %xmm3, %xmm3
-; CHECK-NEXT:  .LBB182_4: # %entry
+; CHECK-NEXT:  .LBB188_4: # %entry
 ; CHECK-NEXT:    movq %xmm0, %rax
 ; CHECK-NEXT:    movq %rax, %rcx
 ; CHECK-NEXT:    shrq %rcx
@@ -7629,10 +7956,10 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
 ; CHECK-NEXT:    cmovnsq %rax, %rdx
 ; CHECK-NEXT:    xorps %xmm1, %xmm1
 ; CHECK-NEXT:    cvtsi2ss %rdx, %xmm1
-; CHECK-NEXT:    jns .LBB182_6
+; CHECK-NEXT:    jns .LBB188_6
 ; CHECK-NEXT:  # %bb.5:
 ; CHECK-NEXT:    addss %xmm1, %xmm1
-; CHECK-NEXT:  .LBB182_6: # %entry
+; CHECK-NEXT:  .LBB188_6: # %entry
 ; CHECK-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
 ; CHECK-NEXT:    movq %xmm0, %rax
@@ -7645,10 +7972,10 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
 ; CHECK-NEXT:    cmovnsq %rax, %rdx
 ; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:    cvtsi2ss %rdx, %xmm0
-; CHECK-NEXT:    jns .LBB182_8
+; CHECK-NEXT:    jns .LBB188_8
 ; CHECK-NEXT:  # %bb.7:
 ; CHECK-NEXT:    addss %xmm0, %xmm0
-; CHECK-NEXT:  .LBB182_8: # %entry
+; CHECK-NEXT:  .LBB188_8: # %entry
 ; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; CHECK-NEXT:    movaps %xmm1, %xmm0


        


More information about the llvm-commits mailing list