[llvm] [X86] LowerATOMIC_STORE - on 32-bit targets see if i64 values were originally legal f64 values that we can store directly. (PR #171602)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 10 03:48:33 PST 2025
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/171602
Based off feedback from #171478
>From dcd7483b08c1540d587506ed0abf790529484e3e Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 10 Dec 2025 11:47:32 +0000
Subject: [PATCH] [X86] LowerATOMIC_STORE - on 32-bit targets see if i64 values
were originally legal f64 values that we can store directly.
Based off feedback from #171478
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 9 +-
llvm/test/CodeGen/X86/atomic-fp.ll | 668 ++++++++----------------
2 files changed, 212 insertions(+), 465 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index fbd875a93fd4a..ebb9119362bc6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -33162,7 +33162,14 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
// For illegal i64 atomic_stores, we can try to use MOVQ or MOVLPS if SSE
// is enabled.
if (VT == MVT::i64) {
- if (Subtarget.hasSSE1()) {
+ SDValue BCValue = peekThroughBitcasts(Node->getVal());
+ if (BCValue.getValueType() == MVT::f64 &&
+ (Subtarget.hasX87() || Subtarget.hasSSE2())) {
+ // If the i64 was bitcast from a f64 then we can do the f64 atomic store
+ // directly with FSTPL/MOVSD.
+ Chain = DAG.getStore(Node->getChain(), dl, BCValue, Node->getBasePtr(),
+ Node->getMemOperand());
+ } else if (Subtarget.hasSSE1()) {
SDValue SclToVec =
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Node->getVal());
MVT StVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32;
diff --git a/llvm/test/CodeGen/X86/atomic-fp.ll b/llvm/test/CodeGen/X86/atomic-fp.ll
index fe79dfe39f645..2dee1d12e7255 100644
--- a/llvm/test/CodeGen/X86/atomic-fp.ll
+++ b/llvm/test/CodeGen/X86/atomic-fp.ll
@@ -80,23 +80,17 @@ define dso_local void @fadd_64r(ptr %loc, double %val) nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: subl $24, %esp
; X86-NOSSE-NEXT: movl 8(%ebp), %eax
; X86-NOSSE-NEXT: fildll (%eax)
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: faddl 12(%ebp)
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %ecx, (%esp)
-; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll (%eax)
+; X86-NOSSE-NEXT: fldl (%esp)
+; X86-NOSSE-NEXT: faddl 12(%ebp)
+; X86-NOSSE-NEXT: fstpl (%eax)
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -109,16 +103,13 @@ define dso_local void @fadd_64r(ptr %loc, double %val) nounwind {
; X86-SSE1-NEXT: subl $16, %esp
; X86-SSE1-NEXT: movl 8(%ebp), %eax
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
; X86-SSE1-NEXT: faddl 12(%ebp)
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, (%eax)
+; X86-SSE1-NEXT: fstpl (%eax)
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -132,9 +123,7 @@ define dso_local void @fadd_64r(ptr %loc, double %val) nounwind {
; X86-SSE2-NEXT: movl 8(%ebp), %eax
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: addsd 12(%ebp), %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, (%eax)
+; X86-SSE2-NEXT: movsd %xmm0, (%eax)
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -148,9 +137,7 @@ define dso_local void @fadd_64r(ptr %loc, double %val) nounwind {
; X86-AVX-NEXT: movl 8(%ebp), %eax
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vaddsd 12(%ebp), %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, (%eax)
+; X86-AVX-NEXT: vmovsd %xmm0, (%eax)
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -246,22 +233,16 @@ define dso_local void @fadd_64g() nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: subl $24, %esp
; X86-NOSSE-NEXT: fildll glob64
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fld1
-; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %eax, (%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll glob64
+; X86-NOSSE-NEXT: fld1
+; X86-NOSSE-NEXT: faddl (%esp)
+; X86-NOSSE-NEXT: fstpl glob64
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -273,16 +254,13 @@ define dso_local void @fadd_64g() nounwind {
; X86-SSE1-NEXT: andl $-8, %esp
; X86-SSE1-NEXT: subl $16, %esp
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fld1
; X86-SSE1-NEXT: faddl (%esp)
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, glob64
+; X86-SSE1-NEXT: fstpl glob64
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -295,9 +273,7 @@ define dso_local void @fadd_64g() nounwind {
; X86-SSE2-NEXT: subl $8, %esp
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, glob64
+; X86-SSE2-NEXT: movsd %xmm0, glob64
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -310,9 +286,7 @@ define dso_local void @fadd_64g() nounwind {
; X86-AVX-NEXT: subl $8, %esp
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, glob64
+; X86-AVX-NEXT: vmovsd %xmm0, glob64
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -409,22 +383,16 @@ define dso_local void @fadd_64imm() nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: subl $24, %esp
; X86-NOSSE-NEXT: fildll -559038737
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fld1
-; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %eax, (%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll -559038737
+; X86-NOSSE-NEXT: fld1
+; X86-NOSSE-NEXT: faddl (%esp)
+; X86-NOSSE-NEXT: fstpl -559038737
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -436,16 +404,13 @@ define dso_local void @fadd_64imm() nounwind {
; X86-SSE1-NEXT: andl $-8, %esp
; X86-SSE1-NEXT: subl $16, %esp
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fld1
; X86-SSE1-NEXT: faddl (%esp)
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, -559038737
+; X86-SSE1-NEXT: fstpl -559038737
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -458,9 +423,7 @@ define dso_local void @fadd_64imm() nounwind {
; X86-SSE2-NEXT: subl $8, %esp
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, -559038737
+; X86-SSE2-NEXT: movsd %xmm0, -559038737
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -473,9 +436,7 @@ define dso_local void @fadd_64imm() nounwind {
; X86-AVX-NEXT: subl $8, %esp
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, -559038737
+; X86-AVX-NEXT: vmovsd %xmm0, -559038737
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -577,22 +538,16 @@ define dso_local void @fadd_64stack() nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $40, %esp
+; X86-NOSSE-NEXT: subl $32, %esp
; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl %eax, (%esp)
; X86-NOSSE-NEXT: fld1
-; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: faddl (%esp)
; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl %eax, (%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -604,16 +559,13 @@ define dso_local void @fadd_64stack() nounwind {
; X86-SSE1-NEXT: andl $-8, %esp
; X86-SSE1-NEXT: subl $24, %esp
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fld1
; X86-SSE1-NEXT: faddl (%esp)
; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -627,8 +579,6 @@ define dso_local void @fadd_64stack() nounwind {
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -642,8 +592,6 @@ define dso_local void @fadd_64stack() nounwind {
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -677,7 +625,7 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: pushl %esi
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $40, %esp
+; X86-NOSSE-NEXT: subl $32, %esp
; X86-NOSSE-NEXT: movl 20(%ebp), %eax
; X86-NOSSE-NEXT: movl 8(%ebp), %ecx
; X86-NOSSE-NEXT: fildll (%ecx,%eax,8)
@@ -685,16 +633,10 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: faddl 12(%ebp)
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOSSE-NEXT: movl %edx, (%esp)
-; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8)
+; X86-NOSSE-NEXT: fldl (%esp)
+; X86-NOSSE-NEXT: faddl 12(%ebp)
+; X86-NOSSE-NEXT: fstpl (%ecx,%eax,8)
; X86-NOSSE-NEXT: leal -4(%ebp), %esp
; X86-NOSSE-NEXT: popl %esi
; X86-NOSSE-NEXT: popl %ebp
@@ -709,16 +651,13 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-SSE1-NEXT: movl 20(%ebp), %eax
; X86-SSE1-NEXT: movl 8(%ebp), %ecx
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
; X86-SSE1-NEXT: faddl 12(%ebp)
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8)
+; X86-SSE1-NEXT: fstpl (%ecx,%eax,8)
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -733,9 +672,7 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-SSE2-NEXT: movl 8(%ebp), %ecx
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: addsd 12(%ebp), %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8)
+; X86-SSE2-NEXT: movsd %xmm0, (%ecx,%eax,8)
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -750,9 +687,7 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-AVX-NEXT: movl 8(%ebp), %ecx
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vaddsd 12(%ebp), %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8)
+; X86-AVX-NEXT: vmovsd %xmm0, (%ecx,%eax,8)
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -852,23 +787,17 @@ define dso_local void @fsub_64r(ptr %loc, double %val) nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: subl $24, %esp
; X86-NOSSE-NEXT: movl 8(%ebp), %eax
; X86-NOSSE-NEXT: fildll (%eax)
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fsubl 12(%ebp)
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %ecx, (%esp)
-; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll (%eax)
+; X86-NOSSE-NEXT: fldl (%esp)
+; X86-NOSSE-NEXT: fsubl 12(%ebp)
+; X86-NOSSE-NEXT: fstpl (%eax)
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -881,16 +810,13 @@ define dso_local void @fsub_64r(ptr %loc, double %val) nounwind {
; X86-SSE1-NEXT: subl $16, %esp
; X86-SSE1-NEXT: movl 8(%ebp), %eax
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
; X86-SSE1-NEXT: fsubl 12(%ebp)
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, (%eax)
+; X86-SSE1-NEXT: fstpl (%eax)
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -904,9 +830,7 @@ define dso_local void @fsub_64r(ptr %loc, double %val) nounwind {
; X86-SSE2-NEXT: movl 8(%ebp), %eax
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: subsd 12(%ebp), %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, (%eax)
+; X86-SSE2-NEXT: movsd %xmm0, (%eax)
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -920,9 +844,7 @@ define dso_local void @fsub_64r(ptr %loc, double %val) nounwind {
; X86-AVX-NEXT: movl 8(%ebp), %eax
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vsubsd 12(%ebp), %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, (%eax)
+; X86-AVX-NEXT: vmovsd %xmm0, (%eax)
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -1018,23 +940,17 @@ define dso_local void @fsub_64g() nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: subl $24, %esp
; X86-NOSSE-NEXT: fildll glob64
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl %eax, (%esp)
; X86-NOSSE-NEXT: fld1
; X86-NOSSE-NEXT: fchs
-; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl %eax, (%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll glob64
+; X86-NOSSE-NEXT: faddl (%esp)
+; X86-NOSSE-NEXT: fstpl glob64
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -1046,17 +962,14 @@ define dso_local void @fsub_64g() nounwind {
; X86-SSE1-NEXT: andl $-8, %esp
; X86-SSE1-NEXT: subl $16, %esp
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fld1
; X86-SSE1-NEXT: fchs
; X86-SSE1-NEXT: faddl (%esp)
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, glob64
+; X86-SSE1-NEXT: fstpl glob64
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -1069,9 +982,7 @@ define dso_local void @fsub_64g() nounwind {
; X86-SSE2-NEXT: subl $8, %esp
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, glob64
+; X86-SSE2-NEXT: movsd %xmm0, glob64
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -1084,9 +995,7 @@ define dso_local void @fsub_64g() nounwind {
; X86-AVX-NEXT: subl $8, %esp
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, glob64
+; X86-AVX-NEXT: vmovsd %xmm0, glob64
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -1184,23 +1093,17 @@ define dso_local void @fsub_64imm() nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: subl $24, %esp
; X86-NOSSE-NEXT: fildll -559038737
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl %eax, (%esp)
; X86-NOSSE-NEXT: fld1
; X86-NOSSE-NEXT: fchs
-; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl %eax, (%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll -559038737
+; X86-NOSSE-NEXT: faddl (%esp)
+; X86-NOSSE-NEXT: fstpl -559038737
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -1212,17 +1115,14 @@ define dso_local void @fsub_64imm() nounwind {
; X86-SSE1-NEXT: andl $-8, %esp
; X86-SSE1-NEXT: subl $16, %esp
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fld1
; X86-SSE1-NEXT: fchs
; X86-SSE1-NEXT: faddl (%esp)
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, -559038737
+; X86-SSE1-NEXT: fstpl -559038737
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -1235,9 +1135,7 @@ define dso_local void @fsub_64imm() nounwind {
; X86-SSE2-NEXT: subl $8, %esp
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, -559038737
+; X86-SSE2-NEXT: movsd %xmm0, -559038737
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -1250,9 +1148,7 @@ define dso_local void @fsub_64imm() nounwind {
; X86-AVX-NEXT: subl $8, %esp
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, -559038737
+; X86-AVX-NEXT: vmovsd %xmm0, -559038737
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -1354,22 +1250,16 @@ define dso_local void @fsub_64stack() nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $40, %esp
+; X86-NOSSE-NEXT: subl $32, %esp
; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl %eax, (%esp)
; X86-NOSSE-NEXT: fld1
-; X86-NOSSE-NEXT: fsubl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fsubl (%esp)
; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl %eax, (%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -1381,16 +1271,13 @@ define dso_local void @fsub_64stack() nounwind {
; X86-SSE1-NEXT: andl $-8, %esp
; X86-SSE1-NEXT: subl $24, %esp
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fld1
; X86-SSE1-NEXT: fsubl (%esp)
; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -1405,8 +1292,6 @@ define dso_local void @fsub_64stack() nounwind {
; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
; X86-SSE2-NEXT: subsd %xmm0, %xmm1
; X86-SSE2-NEXT: movsd %xmm1, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -1421,8 +1306,6 @@ define dso_local void @fsub_64stack() nounwind {
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
; X86-AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -1456,7 +1339,7 @@ define dso_local void @fsub_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: pushl %esi
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $40, %esp
+; X86-NOSSE-NEXT: subl $32, %esp
; X86-NOSSE-NEXT: movl 20(%ebp), %eax
; X86-NOSSE-NEXT: movl 8(%ebp), %ecx
; X86-NOSSE-NEXT: fildll (%ecx,%eax,8)
@@ -1464,16 +1347,10 @@ define dso_local void @fsub_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fsubl 12(%ebp)
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOSSE-NEXT: movl %edx, (%esp)
-; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8)
+; X86-NOSSE-NEXT: fldl (%esp)
+; X86-NOSSE-NEXT: fsubl 12(%ebp)
+; X86-NOSSE-NEXT: fstpl (%ecx,%eax,8)
; X86-NOSSE-NEXT: leal -4(%ebp), %esp
; X86-NOSSE-NEXT: popl %esi
; X86-NOSSE-NEXT: popl %ebp
@@ -1488,16 +1365,13 @@ define dso_local void @fsub_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-SSE1-NEXT: movl 20(%ebp), %eax
; X86-SSE1-NEXT: movl 8(%ebp), %ecx
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
; X86-SSE1-NEXT: fsubl 12(%ebp)
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8)
+; X86-SSE1-NEXT: fstpl (%ecx,%eax,8)
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -1512,9 +1386,7 @@ define dso_local void @fsub_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-SSE2-NEXT: movl 8(%ebp), %ecx
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: subsd 12(%ebp), %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8)
+; X86-SSE2-NEXT: movsd %xmm0, (%ecx,%eax,8)
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -1529,9 +1401,7 @@ define dso_local void @fsub_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-AVX-NEXT: movl 8(%ebp), %ecx
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vsubsd 12(%ebp), %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8)
+; X86-AVX-NEXT: vmovsd %xmm0, (%ecx,%eax,8)
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -1631,23 +1501,17 @@ define dso_local void @fmul_64r(ptr %loc, double %val) nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: subl $24, %esp
; X86-NOSSE-NEXT: movl 8(%ebp), %eax
; X86-NOSSE-NEXT: fildll (%eax)
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fmull 12(%ebp)
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %ecx, (%esp)
-; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll (%eax)
+; X86-NOSSE-NEXT: fldl (%esp)
+; X86-NOSSE-NEXT: fmull 12(%ebp)
+; X86-NOSSE-NEXT: fstpl (%eax)
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -1660,16 +1524,13 @@ define dso_local void @fmul_64r(ptr %loc, double %val) nounwind {
; X86-SSE1-NEXT: subl $16, %esp
; X86-SSE1-NEXT: movl 8(%ebp), %eax
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
; X86-SSE1-NEXT: fmull 12(%ebp)
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, (%eax)
+; X86-SSE1-NEXT: fstpl (%eax)
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -1683,9 +1544,7 @@ define dso_local void @fmul_64r(ptr %loc, double %val) nounwind {
; X86-SSE2-NEXT: movl 8(%ebp), %eax
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: mulsd 12(%ebp), %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, (%eax)
+; X86-SSE2-NEXT: movsd %xmm0, (%eax)
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -1699,9 +1558,7 @@ define dso_local void @fmul_64r(ptr %loc, double %val) nounwind {
; X86-AVX-NEXT: movl 8(%ebp), %eax
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vmulsd 12(%ebp), %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, (%eax)
+; X86-AVX-NEXT: vmovsd %xmm0, (%eax)
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -1794,22 +1651,16 @@ define dso_local void @fmul_64g() nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: subl $24, %esp
; X86-NOSSE-NEXT: fildll glob64
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}}
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %eax, (%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll glob64
+; X86-NOSSE-NEXT: fldl (%esp)
+; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NOSSE-NEXT: fstpl glob64
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -1821,16 +1672,13 @@ define dso_local void @fmul_64g() nounwind {
; X86-SSE1-NEXT: andl $-8, %esp
; X86-SSE1-NEXT: subl $16, %esp
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
; X86-SSE1-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}}
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, glob64
+; X86-SSE1-NEXT: fstpl glob64
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -1843,9 +1691,7 @@ define dso_local void @fmul_64g() nounwind {
; X86-SSE2-NEXT: subl $8, %esp
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, glob64
+; X86-SSE2-NEXT: movsd %xmm0, glob64
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -1858,9 +1704,7 @@ define dso_local void @fmul_64g() nounwind {
; X86-AVX-NEXT: subl $8, %esp
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, glob64
+; X86-AVX-NEXT: vmovsd %xmm0, glob64
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -1957,22 +1801,16 @@ define dso_local void @fmul_64imm() nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: subl $24, %esp
; X86-NOSSE-NEXT: fildll -559038737
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}}
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %eax, (%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll -559038737
+; X86-NOSSE-NEXT: fldl (%esp)
+; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NOSSE-NEXT: fstpl -559038737
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -1984,16 +1822,13 @@ define dso_local void @fmul_64imm() nounwind {
; X86-SSE1-NEXT: andl $-8, %esp
; X86-SSE1-NEXT: subl $16, %esp
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
; X86-SSE1-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}}
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, -559038737
+; X86-SSE1-NEXT: fstpl -559038737
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -2006,9 +1841,7 @@ define dso_local void @fmul_64imm() nounwind {
; X86-SSE2-NEXT: subl $8, %esp
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, -559038737
+; X86-SSE2-NEXT: movsd %xmm0, -559038737
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -2021,9 +1854,7 @@ define dso_local void @fmul_64imm() nounwind {
; X86-AVX-NEXT: subl $8, %esp
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, -559038737
+; X86-AVX-NEXT: vmovsd %xmm0, -559038737
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -2125,22 +1956,16 @@ define dso_local void @fmul_64stack() nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $40, %esp
+; X86-NOSSE-NEXT: subl $32, %esp
; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl %eax, (%esp)
+; X86-NOSSE-NEXT: fldl (%esp)
; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}}
; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl %eax, (%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -2152,16 +1977,13 @@ define dso_local void @fmul_64stack() nounwind {
; X86-SSE1-NEXT: andl $-8, %esp
; X86-SSE1-NEXT: subl $24, %esp
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
; X86-SSE1-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}}
; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -2175,8 +1997,6 @@ define dso_local void @fmul_64stack() nounwind {
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -2190,8 +2010,6 @@ define dso_local void @fmul_64stack() nounwind {
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -2225,7 +2043,7 @@ define dso_local void @fmul_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: pushl %esi
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $40, %esp
+; X86-NOSSE-NEXT: subl $32, %esp
; X86-NOSSE-NEXT: movl 20(%ebp), %eax
; X86-NOSSE-NEXT: movl 8(%ebp), %ecx
; X86-NOSSE-NEXT: fildll (%ecx,%eax,8)
@@ -2233,16 +2051,10 @@ define dso_local void @fmul_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fmull 12(%ebp)
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOSSE-NEXT: movl %edx, (%esp)
-; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8)
+; X86-NOSSE-NEXT: fldl (%esp)
+; X86-NOSSE-NEXT: fmull 12(%ebp)
+; X86-NOSSE-NEXT: fstpl (%ecx,%eax,8)
; X86-NOSSE-NEXT: leal -4(%ebp), %esp
; X86-NOSSE-NEXT: popl %esi
; X86-NOSSE-NEXT: popl %ebp
@@ -2257,16 +2069,13 @@ define dso_local void @fmul_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-SSE1-NEXT: movl 20(%ebp), %eax
; X86-SSE1-NEXT: movl 8(%ebp), %ecx
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
; X86-SSE1-NEXT: fmull 12(%ebp)
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8)
+; X86-SSE1-NEXT: fstpl (%ecx,%eax,8)
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -2281,9 +2090,7 @@ define dso_local void @fmul_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-SSE2-NEXT: movl 8(%ebp), %ecx
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: mulsd 12(%ebp), %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8)
+; X86-SSE2-NEXT: movsd %xmm0, (%ecx,%eax,8)
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -2298,9 +2105,7 @@ define dso_local void @fmul_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-AVX-NEXT: movl 8(%ebp), %ecx
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vmulsd 12(%ebp), %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8)
+; X86-AVX-NEXT: vmovsd %xmm0, (%ecx,%eax,8)
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -2400,23 +2205,17 @@ define dso_local void @fdiv_64r(ptr %loc, double %val) nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: subl $24, %esp
; X86-NOSSE-NEXT: movl 8(%ebp), %eax
; X86-NOSSE-NEXT: fildll (%eax)
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fdivl 12(%ebp)
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %ecx, (%esp)
-; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll (%eax)
+; X86-NOSSE-NEXT: fldl (%esp)
+; X86-NOSSE-NEXT: fdivl 12(%ebp)
+; X86-NOSSE-NEXT: fstpl (%eax)
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -2429,16 +2228,13 @@ define dso_local void @fdiv_64r(ptr %loc, double %val) nounwind {
; X86-SSE1-NEXT: subl $16, %esp
; X86-SSE1-NEXT: movl 8(%ebp), %eax
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
; X86-SSE1-NEXT: fdivl 12(%ebp)
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, (%eax)
+; X86-SSE1-NEXT: fstpl (%eax)
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -2452,9 +2248,7 @@ define dso_local void @fdiv_64r(ptr %loc, double %val) nounwind {
; X86-SSE2-NEXT: movl 8(%ebp), %eax
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: divsd 12(%ebp), %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, (%eax)
+; X86-SSE2-NEXT: movsd %xmm0, (%eax)
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -2468,9 +2262,7 @@ define dso_local void @fdiv_64r(ptr %loc, double %val) nounwind {
; X86-AVX-NEXT: movl 8(%ebp), %eax
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vdivsd 12(%ebp), %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, (%eax)
+; X86-AVX-NEXT: vmovsd %xmm0, (%eax)
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -2565,22 +2357,16 @@ define dso_local void @fdiv_64g() nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: subl $24, %esp
; X86-NOSSE-NEXT: fildll glob64
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}}
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %eax, (%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll glob64
+; X86-NOSSE-NEXT: fldl (%esp)
+; X86-NOSSE-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NOSSE-NEXT: fstpl glob64
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -2592,16 +2378,13 @@ define dso_local void @fdiv_64g() nounwind {
; X86-SSE1-NEXT: andl $-8, %esp
; X86-SSE1-NEXT: subl $16, %esp
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
; X86-SSE1-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}}
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, glob64
+; X86-SSE1-NEXT: fstpl glob64
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -2614,9 +2397,7 @@ define dso_local void @fdiv_64g() nounwind {
; X86-SSE2-NEXT: subl $8, %esp
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: divsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, glob64
+; X86-SSE2-NEXT: movsd %xmm0, glob64
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -2629,9 +2410,7 @@ define dso_local void @fdiv_64g() nounwind {
; X86-AVX-NEXT: subl $8, %esp
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, glob64
+; X86-AVX-NEXT: vmovsd %xmm0, glob64
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -2728,22 +2507,16 @@ define dso_local void @fdiv_64imm() nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: subl $24, %esp
; X86-NOSSE-NEXT: fildll -559038737
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}}
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %eax, (%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll -559038737
+; X86-NOSSE-NEXT: fldl (%esp)
+; X86-NOSSE-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NOSSE-NEXT: fstpl -559038737
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -2755,16 +2528,13 @@ define dso_local void @fdiv_64imm() nounwind {
; X86-SSE1-NEXT: andl $-8, %esp
; X86-SSE1-NEXT: subl $16, %esp
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
; X86-SSE1-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}}
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, -559038737
+; X86-SSE1-NEXT: fstpl -559038737
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -2777,9 +2547,7 @@ define dso_local void @fdiv_64imm() nounwind {
; X86-SSE2-NEXT: subl $8, %esp
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: divsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, -559038737
+; X86-SSE2-NEXT: movsd %xmm0, -559038737
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -2792,9 +2560,7 @@ define dso_local void @fdiv_64imm() nounwind {
; X86-AVX-NEXT: subl $8, %esp
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, -559038737
+; X86-AVX-NEXT: vmovsd %xmm0, -559038737
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -2896,22 +2662,16 @@ define dso_local void @fdiv_64stack() nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $40, %esp
+; X86-NOSSE-NEXT: subl $32, %esp
; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl %eax, (%esp)
; X86-NOSSE-NEXT: fld1
-; X86-NOSSE-NEXT: fdivl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fdivl (%esp)
; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl %eax, (%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -2923,16 +2683,13 @@ define dso_local void @fdiv_64stack() nounwind {
; X86-SSE1-NEXT: andl $-8, %esp
; X86-SSE1-NEXT: subl $24, %esp
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fld1
; X86-SSE1-NEXT: fdivl (%esp)
; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -2947,8 +2704,6 @@ define dso_local void @fdiv_64stack() nounwind {
; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
; X86-SSE2-NEXT: divsd %xmm0, %xmm1
; X86-SSE2-NEXT: movsd %xmm1, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -2963,8 +2718,6 @@ define dso_local void @fdiv_64stack() nounwind {
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
; X86-AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -2998,7 +2751,7 @@ define dso_local void @fdiv_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: pushl %esi
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $40, %esp
+; X86-NOSSE-NEXT: subl $32, %esp
; X86-NOSSE-NEXT: movl 20(%ebp), %eax
; X86-NOSSE-NEXT: movl 8(%ebp), %ecx
; X86-NOSSE-NEXT: fildll (%ecx,%eax,8)
@@ -3006,16 +2759,10 @@ define dso_local void @fdiv_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fdivl 12(%ebp)
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOSSE-NEXT: movl %edx, (%esp)
-; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8)
+; X86-NOSSE-NEXT: fldl (%esp)
+; X86-NOSSE-NEXT: fdivl 12(%ebp)
+; X86-NOSSE-NEXT: fstpl (%ecx,%eax,8)
; X86-NOSSE-NEXT: leal -4(%ebp), %esp
; X86-NOSSE-NEXT: popl %esi
; X86-NOSSE-NEXT: popl %ebp
@@ -3030,16 +2777,13 @@ define dso_local void @fdiv_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-SSE1-NEXT: movl 20(%ebp), %eax
; X86-SSE1-NEXT: movl 8(%ebp), %ecx
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
; X86-SSE1-NEXT: fdivl 12(%ebp)
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8)
+; X86-SSE1-NEXT: fstpl (%ecx,%eax,8)
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -3054,9 +2798,7 @@ define dso_local void @fdiv_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-SSE2-NEXT: movl 8(%ebp), %ecx
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: divsd 12(%ebp), %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8)
+; X86-SSE2-NEXT: movsd %xmm0, (%ecx,%eax,8)
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -3071,9 +2813,7 @@ define dso_local void @fdiv_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-AVX-NEXT: movl 8(%ebp), %ecx
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vdivsd 12(%ebp), %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8)
+; X86-AVX-NEXT: vmovsd %xmm0, (%ecx,%eax,8)
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
More information about the llvm-commits
mailing list