[llvm] [DAG] Don't split f64 constant stores if the fp imm is legal (PR #74622)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 6 08:30:46 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
If the target can generate a specific fp immediate constant, then don't split the store into 2 x i32 stores
Another cleanup step for #<!-- -->74304
---
Full diff: https://github.com/llvm/llvm-project/pull/74622.diff
11 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+3-2)
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (+2-1)
- (modified) llvm/test/CodeGen/ARM/aapcs-hfa-code.ll (+4-8)
- (modified) llvm/test/CodeGen/ARM/ha-alignstack-call.ll (+10-10)
- (modified) llvm/test/CodeGen/Mips/pr49200.ll (+4-5)
- (modified) llvm/test/CodeGen/X86/fp-intrinsics.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/ldexp.ll (+4-3)
- (modified) llvm/test/CodeGen/X86/memset64-on-x86-32.ll (+11-20)
- (modified) llvm/test/CodeGen/X86/pr38738.ll (+9-16)
- (modified) llvm/test/CodeGen/X86/slow-unaligned-mem.ll (+54-40)
- (modified) llvm/test/CodeGen/X86/zero-remat.ll (+4-3)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2a3425a42607e..0c5b2894a2e76 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20911,7 +20911,8 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
// transform should not be done in this case.
SDValue Tmp;
- switch (CFP->getSimpleValueType(0).SimpleTy) {
+ MVT SimpleVT = CFP->getSimpleValueType(0);
+ switch (SimpleVT.SimpleTy) {
default:
llvm_unreachable("Unknown FP type");
case MVT::f16: // We don't do this for these yet.
@@ -20940,7 +20941,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
Ptr, ST->getMemOperand());
}
- if (ST->isSimple() &&
+ if (ST->isSimple() && !TLI.isFPImmLegal(CFP->getValueAPF(), SimpleVT) &&
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
// Many FP stores are not made apparent until after legalize, e.g. for
// argument passing. Since this is so common, custom legalize the
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 7a54141fa711a..5e1f9fbcdde0a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -461,7 +461,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
ST->getOriginalAlign(), MMOFlags, AAInfo);
}
- if (CFP->getValueType(0) == MVT::f64) {
+ if (CFP->getValueType(0) == MVT::f64 &&
+ !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
// If this target supports 64-bit registers, do a single 64-bit store.
if (TLI.isTypeLegal(MVT::i64)) {
SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
diff --git a/llvm/test/CodeGen/ARM/aapcs-hfa-code.ll b/llvm/test/CodeGen/ARM/aapcs-hfa-code.ll
index e32f19ef67452..dabbb1e38a86b 100644
--- a/llvm/test/CodeGen/ARM/aapcs-hfa-code.ll
+++ b/llvm/test/CodeGen/ARM/aapcs-hfa-code.ll
@@ -104,10 +104,8 @@ define arm_aapcs_vfpcc void @test_1double_nosplit([4 x float], [4 x double], [3
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, sp, #8
-; CHECK-NEXT: movw r1, #0
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: movt r1, #16368
-; CHECK-NEXT: strd r0, r1, [sp]
+; CHECK-NEXT: vmov.f64 d16, #1.000000e+00
+; CHECK-NEXT: vstr d16, [sp]
; CHECK-NEXT: bl test_1double_nosplit
; CHECK-NEXT: add sp, sp, #8
; CHECK-NEXT: pop {r11, pc}
@@ -138,10 +136,8 @@ define arm_aapcs_vfpcc void @test_1double_misaligned([4 x double], [4 x double],
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: .pad #16
; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: movw r1, #0
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: movt r1, #16368
-; CHECK-NEXT: strd r0, r1, [sp, #8]
+; CHECK-NEXT: vmov.f64 d16, #1.000000e+00
+; CHECK-NEXT: vstr d16, [sp, #8]
; CHECK-NEXT: bl test_1double_misaligned
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: pop {r11, pc}
diff --git a/llvm/test/CodeGen/ARM/ha-alignstack-call.ll b/llvm/test/CodeGen/ARM/ha-alignstack-call.ll
index e861fe397f849..7e2a911c89281 100644
--- a/llvm/test/CodeGen/ARM/ha-alignstack-call.ll
+++ b/llvm/test/CodeGen/ARM/ha-alignstack-call.ll
@@ -300,16 +300,16 @@ entry:
ret double %call
}
; CHECK-LABEL: g2_1_call:
-; CHECK: movw r0, #0
-; CHECK: mov r1, #0
-; CHECK: movt r0, #16352
-; CHECK: str r1, [sp]
-; CHECK: stmib sp, {r0, r1}
-; CHECK: str r1, [sp, #12]
-; CHECK: str r1, [sp, #16]
-; CHECK: str r1, [sp, #20]
-; CHECK: str r1, [sp, #24]
-; CHECK: str r1, [sp, #28]
+; CHECK: vmov.f64 d16, #5.000000e-01
+; CHECK: mov r0, #0
+; CHECK: str r0, [sp, #8]
+; CHECK: str r0, [sp, #12]
+; CHECK: str r0, [sp, #16]
+; CHECK: vmov.i32 d0, #0x0
+; CHECK: str r0, [sp, #20]
+; CHECK: str r0, [sp, #24]
+; CHECK: str r0, [sp, #28]
+; CHECK: vstr d16, [sp]
; CHECK: bl g2_1
; pass in memory, alignment 8
diff --git a/llvm/test/CodeGen/Mips/pr49200.ll b/llvm/test/CodeGen/Mips/pr49200.ll
index 80a2bdd4e95ee..2a9f207b29e58 100644
--- a/llvm/test/CodeGen/Mips/pr49200.ll
+++ b/llvm/test/CodeGen/Mips/pr49200.ll
@@ -11,11 +11,10 @@ define dso_local void @foo() #0 {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addiusp -24
-; CHECK-NEXT: li16 $2, 0
-; CHECK-NEXT: sw $2, 4($sp)
-; CHECK-NEXT: sw $2, 0($sp)
-; CHECK-NEXT: sw $2, 12($sp)
-; CHECK-NEXT: sw $2, 8($sp)
+; CHECK-NEXT: mtc1 $zero, $f0
+; CHECK-NEXT: mthc1 $zero, $f0
+; CHECK-NEXT: sdc1 $f0, 0($sp)
+; CHECK-NEXT: sdc1 $f0, 8($sp)
; CHECK-NEXT: ldc1 $f0, 0($sp)
; CHECK-NEXT: sdc1 $f0, 16($sp)
; CHECK-NEXT: addiusp 24
diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll
index 32e45adcb94d7..5f77e2cb46cbf 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll
@@ -865,9 +865,9 @@ define double @f19() #0 {
; X87-NEXT: .cfi_def_cfa_offset 32
; X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
+; X87-NEXT: fld1
+; X87-NEXT: fstpl (%esp)
; X87-NEXT: wait
-; X87-NEXT: movl $1072693248, {{[0-9]+}}(%esp) # imm = 0x3FF00000
-; X87-NEXT: movl $0, (%esp)
; X87-NEXT: calll fmod
; X87-NEXT: addl $28, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
diff --git a/llvm/test/CodeGen/X86/ldexp.ll b/llvm/test/CodeGen/X86/ldexp.ll
index 44c57c54ba023..ec128fc6686c8 100644
--- a/llvm/test/CodeGen/X86/ldexp.ll
+++ b/llvm/test/CodeGen/X86/ldexp.ll
@@ -91,10 +91,11 @@ define double @ldexp_f64(i8 zeroext %x) {
;
; WIN32-LABEL: ldexp_f64:
; WIN32: # %bb.0:
+; WIN32-NEXT: subl $12, %esp
; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: pushl %eax
-; WIN32-NEXT: pushl $1072693248 # imm = 0x3FF00000
-; WIN32-NEXT: pushl $0
+; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN32-NEXT: fld1
+; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _ldexp
; WIN32-NEXT: addl $12, %esp
; WIN32-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/memset64-on-x86-32.ll b/llvm/test/CodeGen/X86/memset64-on-x86-32.ll
index c6eecdcdf99cc..480a0970bd39d 100644
--- a/llvm/test/CodeGen/X86/memset64-on-x86-32.ll
+++ b/llvm/test/CodeGen/X86/memset64-on-x86-32.ll
@@ -18,26 +18,17 @@ define void @bork(ptr nocapture align 4 %dst) nounwind {
; SLOW_32-LABEL: bork:
; SLOW_32: # %bb.0:
; SLOW_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SLOW_32-NEXT: movl $0, 4(%eax)
-; SLOW_32-NEXT: movl $0, (%eax)
-; SLOW_32-NEXT: movl $0, 12(%eax)
-; SLOW_32-NEXT: movl $0, 8(%eax)
-; SLOW_32-NEXT: movl $0, 20(%eax)
-; SLOW_32-NEXT: movl $0, 16(%eax)
-; SLOW_32-NEXT: movl $0, 28(%eax)
-; SLOW_32-NEXT: movl $0, 24(%eax)
-; SLOW_32-NEXT: movl $0, 36(%eax)
-; SLOW_32-NEXT: movl $0, 32(%eax)
-; SLOW_32-NEXT: movl $0, 44(%eax)
-; SLOW_32-NEXT: movl $0, 40(%eax)
-; SLOW_32-NEXT: movl $0, 52(%eax)
-; SLOW_32-NEXT: movl $0, 48(%eax)
-; SLOW_32-NEXT: movl $0, 60(%eax)
-; SLOW_32-NEXT: movl $0, 56(%eax)
-; SLOW_32-NEXT: movl $0, 68(%eax)
-; SLOW_32-NEXT: movl $0, 64(%eax)
-; SLOW_32-NEXT: movl $0, 76(%eax)
-; SLOW_32-NEXT: movl $0, 72(%eax)
+; SLOW_32-NEXT: xorps %xmm0, %xmm0
+; SLOW_32-NEXT: movsd %xmm0, 72(%eax)
+; SLOW_32-NEXT: movsd %xmm0, 64(%eax)
+; SLOW_32-NEXT: movsd %xmm0, 56(%eax)
+; SLOW_32-NEXT: movsd %xmm0, 48(%eax)
+; SLOW_32-NEXT: movsd %xmm0, 40(%eax)
+; SLOW_32-NEXT: movsd %xmm0, 32(%eax)
+; SLOW_32-NEXT: movsd %xmm0, 24(%eax)
+; SLOW_32-NEXT: movsd %xmm0, 16(%eax)
+; SLOW_32-NEXT: movsd %xmm0, 8(%eax)
+; SLOW_32-NEXT: movsd %xmm0, (%eax)
; SLOW_32-NEXT: retl
;
; SLOW_64-LABEL: bork:
diff --git a/llvm/test/CodeGen/X86/pr38738.ll b/llvm/test/CodeGen/X86/pr38738.ll
index 753b7ce33d2be..205849e7d05db 100644
--- a/llvm/test/CodeGen/X86/pr38738.ll
+++ b/llvm/test/CodeGen/X86/pr38738.ll
@@ -130,22 +130,15 @@ define void @tryset(ptr nocapture %x) {
; X86SSE2-LABEL: tryset:
; X86SSE2: # %bb.0:
; X86SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86SSE2-NEXT: movl $0, 4(%eax)
-; X86SSE2-NEXT: movl $0, (%eax)
-; X86SSE2-NEXT: movl $0, 12(%eax)
-; X86SSE2-NEXT: movl $0, 8(%eax)
-; X86SSE2-NEXT: movl $0, 20(%eax)
-; X86SSE2-NEXT: movl $0, 16(%eax)
-; X86SSE2-NEXT: movl $0, 28(%eax)
-; X86SSE2-NEXT: movl $0, 24(%eax)
-; X86SSE2-NEXT: movl $0, 36(%eax)
-; X86SSE2-NEXT: movl $0, 32(%eax)
-; X86SSE2-NEXT: movl $0, 44(%eax)
-; X86SSE2-NEXT: movl $0, 40(%eax)
-; X86SSE2-NEXT: movl $0, 52(%eax)
-; X86SSE2-NEXT: movl $0, 48(%eax)
-; X86SSE2-NEXT: movl $0, 60(%eax)
-; X86SSE2-NEXT: movl $0, 56(%eax)
+; X86SSE2-NEXT: xorps %xmm0, %xmm0
+; X86SSE2-NEXT: movsd %xmm0, 56(%eax)
+; X86SSE2-NEXT: movsd %xmm0, 48(%eax)
+; X86SSE2-NEXT: movsd %xmm0, 40(%eax)
+; X86SSE2-NEXT: movsd %xmm0, 32(%eax)
+; X86SSE2-NEXT: movsd %xmm0, 24(%eax)
+; X86SSE2-NEXT: movsd %xmm0, 16(%eax)
+; X86SSE2-NEXT: movsd %xmm0, 8(%eax)
+; X86SSE2-NEXT: movsd %xmm0, (%eax)
; X86SSE2-NEXT: retl
;
; X64AVX-LABEL: tryset:
diff --git a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
index 85afa83e3f08e..d74d195439bda 100644
--- a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
+++ b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
@@ -1,16 +1,16 @@
; Intel chips with slow unaligned memory accesses
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3 2>&1 | FileCheck %s --check-prefixes=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3m 2>&1 | FileCheck %s --check-prefixes=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium-m 2>&1 | FileCheck %s --check-prefixes=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefixes=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefixes=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefixes=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefixes=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefixes=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefixes=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=penryn 2>&1 | FileCheck %s --check-prefixes=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bonnell 2>&1 | FileCheck %s --check-prefixes=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SCALAR
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3m 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SCALAR
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium-m 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=penryn 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bonnell 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
; Intel chips with fast unaligned memory accesses
@@ -26,15 +26,15 @@
; AMD chips with slow unaligned memory accesses
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-4 2>&1 | FileCheck %s --check-prefixes=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-xp 2>&1 | FileCheck %s --check-prefixes=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8 2>&1 | FileCheck %s --check-prefixes=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron 2>&1 | FileCheck %s --check-prefixes=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64 2>&1 | FileCheck %s --check-prefixes=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-fx 2>&1 | FileCheck %s --check-prefixes=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-4 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SCALAR
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-xp 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SCALAR
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-fx 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
; AMD chips with fast unaligned memory accesses
@@ -67,26 +67,40 @@
; SLOW-NOT: not a recognized processor
; FAST-NOT: not a recognized processor
define void @store_zeros(ptr %a) {
-; SLOW-LABEL: store_zeros:
-; SLOW: # %bb.0:
-; SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SLOW-NEXT: movl $0
-; SLOW-NEXT: movl $0
-; SLOW-NEXT: movl $0
-; SLOW-NEXT: movl $0
-; SLOW-NEXT: movl $0
-; SLOW-NEXT: movl $0
-; SLOW-NEXT: movl $0
-; SLOW-NEXT: movl $0
-; SLOW-NEXT: movl $0
-; SLOW-NEXT: movl $0
-; SLOW-NEXT: movl $0
-; SLOW-NEXT: movl $0
-; SLOW-NEXT: movl $0
-; SLOW-NEXT: movl $0
-; SLOW-NEXT: movl $0
-; SLOW-NEXT: movl $0
-; SLOW-NOT: movl
+; SLOW-SCALAR-LABEL: store_zeros:
+; SLOW-SCALAR: # %bb.0:
+; SLOW-SCALAR-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SLOW-SCALAR-NEXT: movl $0
+; SLOW-SCALAR-NEXT: movl $0
+; SLOW-SCALAR-NEXT: movl $0
+; SLOW-SCALAR-NEXT: movl $0
+; SLOW-SCALAR-NEXT: movl $0
+; SLOW-SCALAR-NEXT: movl $0
+; SLOW-SCALAR-NEXT: movl $0
+; SLOW-SCALAR-NEXT: movl $0
+; SLOW-SCALAR-NEXT: movl $0
+; SLOW-SCALAR-NEXT: movl $0
+; SLOW-SCALAR-NEXT: movl $0
+; SLOW-SCALAR-NEXT: movl $0
+; SLOW-SCALAR-NEXT: movl $0
+; SLOW-SCALAR-NEXT: movl $0
+; SLOW-SCALAR-NEXT: movl $0
+; SLOW-SCALAR-NEXT: movl $0
+; SLOW-SCALAR-NOT: movl
+;
+; SLOW-SSE-LABEL: store_zeros:
+; SLOW-SSE: # %bb.0:
+; SLOW-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SLOW-SSE-NEXT: xorps %xmm0, %xmm0
+; SLOW-SSE-NEXT: movsd %xmm0
+; SLOW-SSE-NEXT: movsd %xmm0
+; SLOW-SSE-NEXT: movsd %xmm0
+; SLOW-SSE-NEXT: movsd %xmm0
+; SLOW-SSE-NEXT: movsd %xmm0
+; SLOW-SSE-NEXT: movsd %xmm0
+; SLOW-SSE-NEXT: movsd %xmm0
+; SLOW-SSE-NEXT: movsd %xmm0
+; SLOW-SSE-NOT: movsd
;
; FAST-SSE-LABEL: store_zeros:
; FAST-SSE: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/zero-remat.ll b/llvm/test/CodeGen/X86/zero-remat.ll
index 60bb2c420cda4..000e0d14b711f 100644
--- a/llvm/test/CodeGen/X86/zero-remat.ll
+++ b/llvm/test/CodeGen/X86/zero-remat.ll
@@ -19,11 +19,12 @@ define double @foo() nounwind {
;
; CHECK-32-LABEL: foo:
; CHECK-32: # %bb.0:
-; CHECK-32-NEXT: pushl $0
-; CHECK-32-NEXT: pushl $0
+; CHECK-32-NEXT: subl $8, %esp
+; CHECK-32-NEXT: fldz
+; CHECK-32-NEXT: fstpl (%esp)
; CHECK-32-NEXT: calll bar at PLT
-; CHECK-32-NEXT: addl $8, %esp
; CHECK-32-NEXT: fldz
+; CHECK-32-NEXT: addl $8, %esp
; CHECK-32-NEXT: retl
call void @bar(double 0.0)
ret double 0.0
``````````
</details>
https://github.com/llvm/llvm-project/pull/74622
More information about the llvm-commits
mailing list