[llvm] [X86] LowerATOMIC_STORE - on 32-bit targets see if i64 values were originally legal f64 values that we can store directly. (PR #171602)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 10 03:49:04 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Based off feedback from #<!-- -->171478
---
Patch is 63.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171602.diff
2 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+8-1)
- (modified) llvm/test/CodeGen/X86/atomic-fp.ll (+204-464)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index fbd875a93fd4a..ebb9119362bc6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -33162,7 +33162,14 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
// For illegal i64 atomic_stores, we can try to use MOVQ or MOVLPS if SSE
// is enabled.
if (VT == MVT::i64) {
- if (Subtarget.hasSSE1()) {
+ SDValue BCValue = peekThroughBitcasts(Node->getVal());
+ if (BCValue.getValueType() == MVT::f64 &&
+ (Subtarget.hasX87() || Subtarget.hasSSE2())) {
+ // If the i64 was bitcast from a f64 then we can do the f64 atomic store
+ // directly with FSTPL/MOVSD.
+ Chain = DAG.getStore(Node->getChain(), dl, BCValue, Node->getBasePtr(),
+ Node->getMemOperand());
+ } else if (Subtarget.hasSSE1()) {
SDValue SclToVec =
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Node->getVal());
MVT StVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32;
diff --git a/llvm/test/CodeGen/X86/atomic-fp.ll b/llvm/test/CodeGen/X86/atomic-fp.ll
index fe79dfe39f645..2dee1d12e7255 100644
--- a/llvm/test/CodeGen/X86/atomic-fp.ll
+++ b/llvm/test/CodeGen/X86/atomic-fp.ll
@@ -80,23 +80,17 @@ define dso_local void @fadd_64r(ptr %loc, double %val) nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: subl $24, %esp
; X86-NOSSE-NEXT: movl 8(%ebp), %eax
; X86-NOSSE-NEXT: fildll (%eax)
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: faddl 12(%ebp)
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %ecx, (%esp)
-; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll (%eax)
+; X86-NOSSE-NEXT: fldl (%esp)
+; X86-NOSSE-NEXT: faddl 12(%ebp)
+; X86-NOSSE-NEXT: fstpl (%eax)
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -109,16 +103,13 @@ define dso_local void @fadd_64r(ptr %loc, double %val) nounwind {
; X86-SSE1-NEXT: subl $16, %esp
; X86-SSE1-NEXT: movl 8(%ebp), %eax
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
; X86-SSE1-NEXT: faddl 12(%ebp)
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, (%eax)
+; X86-SSE1-NEXT: fstpl (%eax)
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -132,9 +123,7 @@ define dso_local void @fadd_64r(ptr %loc, double %val) nounwind {
; X86-SSE2-NEXT: movl 8(%ebp), %eax
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: addsd 12(%ebp), %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, (%eax)
+; X86-SSE2-NEXT: movsd %xmm0, (%eax)
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -148,9 +137,7 @@ define dso_local void @fadd_64r(ptr %loc, double %val) nounwind {
; X86-AVX-NEXT: movl 8(%ebp), %eax
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vaddsd 12(%ebp), %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, (%eax)
+; X86-AVX-NEXT: vmovsd %xmm0, (%eax)
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -246,22 +233,16 @@ define dso_local void @fadd_64g() nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: subl $24, %esp
; X86-NOSSE-NEXT: fildll glob64
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fld1
-; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %eax, (%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll glob64
+; X86-NOSSE-NEXT: fld1
+; X86-NOSSE-NEXT: faddl (%esp)
+; X86-NOSSE-NEXT: fstpl glob64
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -273,16 +254,13 @@ define dso_local void @fadd_64g() nounwind {
; X86-SSE1-NEXT: andl $-8, %esp
; X86-SSE1-NEXT: subl $16, %esp
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fld1
; X86-SSE1-NEXT: faddl (%esp)
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, glob64
+; X86-SSE1-NEXT: fstpl glob64
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -295,9 +273,7 @@ define dso_local void @fadd_64g() nounwind {
; X86-SSE2-NEXT: subl $8, %esp
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, glob64
+; X86-SSE2-NEXT: movsd %xmm0, glob64
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -310,9 +286,7 @@ define dso_local void @fadd_64g() nounwind {
; X86-AVX-NEXT: subl $8, %esp
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, glob64
+; X86-AVX-NEXT: vmovsd %xmm0, glob64
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -409,22 +383,16 @@ define dso_local void @fadd_64imm() nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: subl $24, %esp
; X86-NOSSE-NEXT: fildll -559038737
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fld1
-; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %eax, (%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll -559038737
+; X86-NOSSE-NEXT: fld1
+; X86-NOSSE-NEXT: faddl (%esp)
+; X86-NOSSE-NEXT: fstpl -559038737
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -436,16 +404,13 @@ define dso_local void @fadd_64imm() nounwind {
; X86-SSE1-NEXT: andl $-8, %esp
; X86-SSE1-NEXT: subl $16, %esp
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fld1
; X86-SSE1-NEXT: faddl (%esp)
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, -559038737
+; X86-SSE1-NEXT: fstpl -559038737
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -458,9 +423,7 @@ define dso_local void @fadd_64imm() nounwind {
; X86-SSE2-NEXT: subl $8, %esp
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, -559038737
+; X86-SSE2-NEXT: movsd %xmm0, -559038737
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -473,9 +436,7 @@ define dso_local void @fadd_64imm() nounwind {
; X86-AVX-NEXT: subl $8, %esp
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, -559038737
+; X86-AVX-NEXT: vmovsd %xmm0, -559038737
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -577,22 +538,16 @@ define dso_local void @fadd_64stack() nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $40, %esp
+; X86-NOSSE-NEXT: subl $32, %esp
; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl %eax, (%esp)
; X86-NOSSE-NEXT: fld1
-; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: faddl (%esp)
; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl %eax, (%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -604,16 +559,13 @@ define dso_local void @fadd_64stack() nounwind {
; X86-SSE1-NEXT: andl $-8, %esp
; X86-SSE1-NEXT: subl $24, %esp
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fld1
; X86-SSE1-NEXT: faddl (%esp)
; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -627,8 +579,6 @@ define dso_local void @fadd_64stack() nounwind {
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -642,8 +592,6 @@ define dso_local void @fadd_64stack() nounwind {
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -677,7 +625,7 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: pushl %esi
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $40, %esp
+; X86-NOSSE-NEXT: subl $32, %esp
; X86-NOSSE-NEXT: movl 20(%ebp), %eax
; X86-NOSSE-NEXT: movl 8(%ebp), %ecx
; X86-NOSSE-NEXT: fildll (%ecx,%eax,8)
@@ -685,16 +633,10 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: faddl 12(%ebp)
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOSSE-NEXT: movl %edx, (%esp)
-; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8)
+; X86-NOSSE-NEXT: fldl (%esp)
+; X86-NOSSE-NEXT: faddl 12(%ebp)
+; X86-NOSSE-NEXT: fstpl (%ecx,%eax,8)
; X86-NOSSE-NEXT: leal -4(%ebp), %esp
; X86-NOSSE-NEXT: popl %esi
; X86-NOSSE-NEXT: popl %ebp
@@ -709,16 +651,13 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-SSE1-NEXT: movl 20(%ebp), %eax
; X86-SSE1-NEXT: movl 8(%ebp), %ecx
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
; X86-SSE1-NEXT: faddl 12(%ebp)
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8)
+; X86-SSE1-NEXT: fstpl (%ecx,%eax,8)
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -733,9 +672,7 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-SSE2-NEXT: movl 8(%ebp), %ecx
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: addsd 12(%ebp), %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8)
+; X86-SSE2-NEXT: movsd %xmm0, (%ecx,%eax,8)
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -750,9 +687,7 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
; X86-AVX-NEXT: movl 8(%ebp), %ecx
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vaddsd 12(%ebp), %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8)
+; X86-AVX-NEXT: vmovsd %xmm0, (%ecx,%eax,8)
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -852,23 +787,17 @@ define dso_local void @fsub_64r(ptr %loc, double %val) nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: subl $24, %esp
; X86-NOSSE-NEXT: movl 8(%ebp), %eax
; X86-NOSSE-NEXT: fildll (%eax)
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fsubl 12(%ebp)
-; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %ecx, (%esp)
-; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fildll (%esp)
-; X86-NOSSE-NEXT: fistpll (%eax)
+; X86-NOSSE-NEXT: fldl (%esp)
+; X86-NOSSE-NEXT: fsubl 12(%ebp)
+; X86-NOSSE-NEXT: fstpl (%eax)
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
@@ -881,16 +810,13 @@ define dso_local void @fsub_64r(ptr %loc, double %val) nounwind {
; X86-SSE1-NEXT: subl $16, %esp
; X86-SSE1-NEXT: movl 8(%ebp), %eax
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
-; X86-SSE1-NEXT: xorps %xmm1, %xmm1
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT: movss %xmm1, (%esp)
-; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
; X86-SSE1-NEXT: fsubl 12(%ebp)
-; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT: movlps %xmm0, (%eax)
+; X86-SSE1-NEXT: fstpl (%eax)
; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
@@ -904,9 +830,7 @@ define dso_local void @fsub_64r(ptr %loc, double %val) nounwind {
; X86-SSE2-NEXT: movl 8(%ebp), %eax
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: subsd 12(%ebp), %xmm0
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlps %xmm0, (%eax)
+; X86-SSE2-NEXT: movsd %xmm0, (%eax)
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
@@ -920,9 +844,7 @@ define dso_local void @fsub_64r(ptr %loc, double %val) nounwind {
; X86-AVX-NEXT: movl 8(%ebp), %eax
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vsubsd 12(%ebp), %xmm0, %xmm0
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovlps %xmm0, (%eax)
+; X86-AVX-NEXT: vmovsd %xmm0, (%eax)
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
@@ -1018,23 +940,17 @@ define dso_local void @fsub_64g() nounwind {
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/171602
More information about the llvm-commits
mailing list