[llvm] [DAG] Enable bitcast STLF for Constant/Undef (PR #172523)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 29 07:34:51 PST 2026
https://github.com/Michael-Chen-NJU updated https://github.com/llvm/llvm-project/pull/172523
>From fdb2856d75bb1898f33d3b5ba451b455da9848ce Mon Sep 17 00:00:00 2001
From: Michael-Chen-NJU <2802328816 at qq.com>
Date: Wed, 17 Dec 2025 01:23:35 +0800
Subject: [PATCH 1/7] [DAGCombiner] Fix crash and enable bitcast forwarding in
ForwardStoreValueToDirectLoad
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 +++++-
...ing-mode-fixed-length-insert-vector-elt.ll | 7 +--
.../sve-streaming-mode-fixed-length-stores.ll | 8 +---
llvm/test/CodeGen/AArch64/v3f-to-int.ll | 3 +-
llvm/test/CodeGen/X86/dag-stlf-mismatch.ll | 43 +++++++++++++++++++
llvm/test/CodeGen/X86/pr38533.ll | 9 ++--
6 files changed, 63 insertions(+), 21 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6a99d4e29b64f..111218ef2a01a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20478,8 +20478,18 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
break;
if (STMemType != LDMemType) {
// TODO: Support vectors? This requires extract_subvector/bitcast.
- if (!STMemType.isVector() && !LDMemType.isVector() &&
- STMemType.isInteger() && LDMemType.isInteger())
+ if (LdMemSize == StMemSize) {
+ bool IsConstantOrUndef = isa<ConstantSDNode>(Val) ||
+ isa<ConstantFPSDNode>(Val) ||
+ isa<ConstantPoolSDNode>(Val) || Val.isUndef();
+
+ if (IsConstantOrUndef && isTypeLegal(Val.getValueType()) &&
+ TLI.isOperationLegalOrCustom(ISD::BITCAST, LDMemType))
+ Val = DAG.getBitcast(LDMemType, Val);
+ else
+ break;
+ } else if (!STMemType.isVector() && !LDMemType.isVector() &&
+ STMemType.isInteger() && LDMemType.isInteger())
Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
else
break;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
index ad00e99b704dd..5f21c80c2fdd0 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
@@ -426,8 +426,7 @@ define <1 x i64> @insertelement_v1i64(<1 x i64> %op1) {
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%r = insertelement <1 x i64> %op1, i64 5, i64 0
@@ -760,9 +759,7 @@ define <1 x double> @insertelement_v1f64(<1 x double> %op1) {
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT: mov x8, #4617315517961601024 // =0x4014000000000000
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, #5.00000000
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%r = insertelement <1 x double> %op1, double 5.0, i64 0
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
index 17579d79896da..d2e9fd79e50a9 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
@@ -314,9 +314,7 @@ define void @store_v1i64(ptr %a) {
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT: str xzr, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
-; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: str xzr, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
store <1 x i64> zeroinitializer, ptr %a
@@ -334,9 +332,7 @@ define void @store_v1f64(ptr %a) {
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT: str xzr, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
-; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: str xzr, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
store <1 x double> zeroinitializer, ptr %a
diff --git a/llvm/test/CodeGen/AArch64/v3f-to-int.ll b/llvm/test/CodeGen/AArch64/v3f-to-int.ll
index 6d4061fb02cff..2e4041f60049b 100644
--- a/llvm/test/CodeGen/AArch64/v3f-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/v3f-to-int.ll
@@ -6,8 +6,7 @@ define void @convert_v3f32() {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: str wzr, [sp, #12]
-; CHECK-NEXT: ldr s0, [sp, #12]
+; CHECK-NEXT: movi d0, #0000000000000000
; CHECK-NEXT: strb wzr, [x8]
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: str h0, [x8]
diff --git a/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll b/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
new file mode 100644
index 0000000000000..7d65ff77494cf
--- /dev/null
+++ b/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define float @test_stlf_integer(ptr %p, float %v) {
+; CHECK-LABEL: test_stlf_integer:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl $0, (%rdi)
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: retq
+ store i32 0, ptr %p, align 4
+ %f = load float, ptr %p, align 4
+ %r = fmul fast float %f, %v
+ ret float %r
+}
+
+define float @test_stlf_vector(ptr %p, float %v) {
+; CHECK-LABEL: test_stlf_vector:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: movups %xmm1, (%rdi)
+; CHECK-NEXT: mulss (%rdi), %xmm0
+; CHECK-NEXT: retq
+ store <4 x float> zeroinitializer, ptr %p, align 4
+ %f = load float, ptr %p, align 4
+ %r = fmul fast float %f, %v
+ ret float %r
+}
+
+define float @test_stlf_bitcast(ptr %p, float %v) {
+; CHECK-LABEL: test_stlf_bitcast:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: movups %xmm1, (%rdi)
+; CHECK-NEXT: mulss (%rdi), %xmm0
+; CHECK-NEXT: retq
+ store <2 x i64> zeroinitializer, ptr %p, align 4
+ %f = load float, ptr %p, align 4
+ %r = fmul fast float %f, %v
+ ret float %r
+}
diff --git a/llvm/test/CodeGen/X86/pr38533.ll b/llvm/test/CodeGen/X86/pr38533.ll
index f1bbb2ffdffd0..11db6bfa99207 100644
--- a/llvm/test/CodeGen/X86/pr38533.ll
+++ b/llvm/test/CodeGen/X86/pr38533.ll
@@ -7,23 +7,20 @@
define void @constant_fold_vector_to_half() {
; SSE2-LABEL: constant_fold_vector_to_half:
; SSE2: # %bb.0:
-; SSE2-NEXT: movw $16384, -{{[0-9]+}}(%rsp) # imm = 0x4000
-; SSE2-NEXT: pinsrw $0, -{{[0-9]+}}(%rsp), %xmm0
+; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: pextrw $0, %xmm0, %eax
; SSE2-NEXT: movw %ax, (%rax)
; SSE2-NEXT: retq
;
; AVX512-LABEL: constant_fold_vector_to_half:
; AVX512: # %bb.0:
-; AVX512-NEXT: movw $16384, -{{[0-9]+}}(%rsp) # imm = 0x4000
-; AVX512-NEXT: vpinsrw $0, -{{[0-9]+}}(%rsp), %xmm0, %xmm0
+; AVX512-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpextrw $0, %xmm0, (%rax)
; AVX512-NEXT: retq
;
; AVX512FP16-LABEL: constant_fold_vector_to_half:
; AVX512FP16: # %bb.0:
-; AVX512FP16-NEXT: movw $16384, -{{[0-9]+}}(%rsp) # imm = 0x4000
-; AVX512FP16-NEXT: vmovsh -{{[0-9]+}}(%rsp), %xmm0
+; AVX512FP16-NEXT: vmovsh {{.*#+}} xmm0 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0]
; AVX512FP16-NEXT: vmovsh %xmm0, (%rax)
; AVX512FP16-NEXT: retq
store volatile half bitcast (<4 x i4> <i4 0, i4 0, i4 0, i4 4> to half), ptr undef
>From 5152170ab0895b2651125f3833789bd86787dae5 Mon Sep 17 00:00:00 2001
From: Michael-Chen-NJU <2802328816 at qq.com>
Date: Wed, 17 Dec 2025 11:45:43 +0800
Subject: [PATCH 2/7] [DAG] uses standard DAG.isConstant* helpers
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++---
llvm/test/CodeGen/X86/dag-stlf-mismatch.ll | 22 ++++++++++++++++---
2 files changed, 22 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 111218ef2a01a..984a01a61e9a0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20479,9 +20479,9 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
if (STMemType != LDMemType) {
// TODO: Support vectors? This requires extract_subvector/bitcast.
if (LdMemSize == StMemSize) {
- bool IsConstantOrUndef = isa<ConstantSDNode>(Val) ||
- isa<ConstantFPSDNode>(Val) ||
- isa<ConstantPoolSDNode>(Val) || Val.isUndef();
+ bool IsConstantOrUndef =
+ Val.isUndef() || DAG.isConstantIntBuildVectorOrConstantInt(Val) ||
+ DAG.isConstantFPBuildVectorOrConstantFP(Val);
if (IsConstantOrUndef && isTypeLegal(Val.getValueType()) &&
TLI.isOperationLegalOrCustom(ISD::BITCAST, LDMemType))
diff --git a/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll b/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
index 7d65ff77494cf..f37cb20c04a07 100644
--- a/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
+++ b/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
@@ -1,8 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
+%struct.Data = type { float }
define float @test_stlf_integer(ptr %p, float %v) {
; CHECK-LABEL: test_stlf_integer:
@@ -38,6 +37,23 @@ define float @test_stlf_bitcast(ptr %p, float %v) {
; CHECK-NEXT: retq
store <2 x i64> zeroinitializer, ptr %p, align 4
%f = load float, ptr %p, align 4
- %r = fmul fast float %f, %v
+ %r = fmul float %f, %v
ret float %r
}
+
+declare void @ext_func(ptr byval(%struct.Data) align 4 %p)
+define void @test_stlf_late_byval(ptr %ptr) {
+; CHECK-LABEL: test_stlf_late_byval:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movl $0, (%rdi)
+; CHECK-NEXT: movl $0, (%rsp)
+; CHECK-NEXT: callq ext_func at PLT
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+ store i32 0, ptr %ptr, align 4
+ call void @ext_func(ptr byval(%struct.Data) align 4 %ptr)
+ ret void
+}
\ No newline at end of file
>From c3abf924422ed0013408a4b8abbeeea35510c927 Mon Sep 17 00:00:00 2001
From: Michael-Chen-NJU <2802328816 at qq.com>
Date: Thu, 25 Dec 2025 21:13:39 +0800
Subject: [PATCH 3/7] fix
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 +------
llvm/test/CodeGen/X86/dag-stlf-mismatch.ll | 2 +-
2 files changed, 2 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 984a01a61e9a0..647acd809bdb0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20479,12 +20479,7 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
if (STMemType != LDMemType) {
// TODO: Support vectors? This requires extract_subvector/bitcast.
if (LdMemSize == StMemSize) {
- bool IsConstantOrUndef =
- Val.isUndef() || DAG.isConstantIntBuildVectorOrConstantInt(Val) ||
- DAG.isConstantFPBuildVectorOrConstantFP(Val);
-
- if (IsConstantOrUndef && isTypeLegal(Val.getValueType()) &&
- TLI.isOperationLegalOrCustom(ISD::BITCAST, LDMemType))
+ if (DAG.isConstantValueOfAnyType(Val) && isTypeLegal(STMemType))
Val = DAG.getBitcast(LDMemType, Val);
else
break;
diff --git a/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll b/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
index f37cb20c04a07..82550d675d930 100644
--- a/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
+++ b/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
@@ -56,4 +56,4 @@ define void @test_stlf_late_byval(ptr %ptr) {
store i32 0, ptr %ptr, align 4
call void @ext_func(ptr byval(%struct.Data) align 4 %ptr)
ret void
-}
\ No newline at end of file
+}
>From 0da547bbe442ef1dbcd04472c603d04361b44e45 Mon Sep 17 00:00:00 2001
From: Michael-Chen-NJU <2802328816 at qq.com>
Date: Sat, 27 Dec 2025 21:43:06 +0800
Subject: [PATCH 4/7] [SelectionDAG] Support STLF for vector loads from larger
vector stores
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 17 +++++++++++++++++
.../X86/avx512-shuffles/shuffle-chained-bf16.ll | 5 +----
2 files changed, 18 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 647acd809bdb0..fa45a249c40c0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20483,6 +20483,23 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
Val = DAG.getBitcast(LDMemType, Val);
else
break;
+ } else if (LDMemType.isVector()) {
+ EVT EltVT = LDMemType.getVectorElementType();
+ uint64_t EltSize = EltVT.getFixedSizeInBits();
+ uint64_t StSize = StMemSize.getFixedValue();
+
+ if (StSize % EltSize != 0)
+ break;
+
+ EVT InterVT =
+ EVT::getVectorVT(*DAG.getContext(), EltVT, StSize / EltSize);
+ if (!isTypeLegal(InterVT) ||
+ !TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, InterVT))
+ break;
+
+ Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(LD), LDMemType,
+ DAG.getBitcast(InterVT, Val),
+ DAG.getVectorIdxConstant(0, SDLoc(LD)));
} else if (!STMemType.isVector() && !LDMemType.isVector() &&
STMemType.isInteger() && LDMemType.isInteger())
Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-chained-bf16.ll b/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-chained-bf16.ll
index 12ce721b8c5d5..c7200850af699 100644
--- a/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-chained-bf16.ll
+++ b/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-chained-bf16.ll
@@ -37,11 +37,8 @@ define <2 x bfloat> @shuffle_chained_v16bf16(<16 x bfloat> %a) {
; CHECK-NEXT: .cfi_def_cfa_register %rbp
; CHECK-NEXT: andq $-32, %rsp
; CHECK-NEXT: subq $96, %rsp
-; CHECK-NEXT: vmovaps %ymm0, (%rsp)
-; CHECK-NEXT: vmovdqa (%rsp), %xmm0
+; CHECK-NEXT: vmovdqa %ymm0, (%rsp)
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
-; CHECK-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
>From 1e9185a19b7db86148c301552e2f78b044e2a86c Mon Sep 17 00:00:00 2001
From: Michael-Chen-NJU <2802328816 at qq.com>
Date: Sun, 11 Jan 2026 14:54:52 +0800
Subject: [PATCH 5/7] use getExtractSubvector
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 ++---
llvm/test/CodeGen/X86/dag-stlf-mismatch.ll | 11 +++++------
2 files changed, 7 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index fa45a249c40c0..2265acccce0b9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20497,9 +20497,8 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
!TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, InterVT))
break;
- Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(LD), LDMemType,
- DAG.getBitcast(InterVT, Val),
- DAG.getVectorIdxConstant(0, SDLoc(LD)));
+ Val = DAG.getExtractSubvector(SDLoc(LD), LDMemType,
+ DAG.getBitcast(InterVT, Val), 0);
} else if (!STMemType.isVector() && !LDMemType.isVector() &&
STMemType.isInteger() && LDMemType.isInteger())
Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
diff --git a/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll b/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
index 82550d675d930..5d830710021eb 100644
--- a/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
+++ b/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
@@ -7,11 +7,12 @@ define float @test_stlf_integer(ptr %p, float %v) {
; CHECK-LABEL: test_stlf_integer:
; CHECK: # %bb.0:
; CHECK-NEXT: movl $0, (%rdi)
-; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: mulss %xmm1, %xmm0
; CHECK-NEXT: retq
store i32 0, ptr %p, align 4
%f = load float, ptr %p, align 4
- %r = fmul fast float %f, %v
+ %r = fmul float %f, %v
ret float %r
}
@@ -24,7 +25,7 @@ define float @test_stlf_vector(ptr %p, float %v) {
; CHECK-NEXT: retq
store <4 x float> zeroinitializer, ptr %p, align 4
%f = load float, ptr %p, align 4
- %r = fmul fast float %f, %v
+ %r = fmul float %f, %v
ret float %r
}
@@ -42,16 +43,14 @@ define float @test_stlf_bitcast(ptr %p, float %v) {
}
declare void @ext_func(ptr byval(%struct.Data) align 4 %p)
-define void @test_stlf_late_byval(ptr %ptr) {
+define void @test_stlf_late_byval(ptr %ptr) nounwind {
; CHECK-LABEL: test_stlf_late_byval:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: movl $0, (%rdi)
; CHECK-NEXT: movl $0, (%rsp)
; CHECK-NEXT: callq ext_func at PLT
; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
store i32 0, ptr %ptr, align 4
call void @ext_func(ptr byval(%struct.Data) align 4 %ptr)
>From 4968e3f5b84fe5cedad4aa579cd8a344d36e21f9 Mon Sep 17 00:00:00 2001
From: Michael-Chen-NJU <2802328816 at qq.com>
Date: Thu, 29 Jan 2026 21:37:33 +0800
Subject: [PATCH 6/7] [DAGCombine] Extend ForwardStoreValueToDirectLoad to
non-constant values
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 +++--
.../CodeGen/AArch64/arm64ec-entry-thunks.ll | 2 +-
.../CodeGen/AArch64/arm64ec-exit-thunks.ll | 2 +-
.../CodeGen/AArch64/sve-forward-st-to-ld.ll | 1 -
...treaming-mode-fixed-length-bit-counting.ll | 9 ++----
...streaming-mode-fixed-length-fp-compares.ll | 3 +-
...aming-mode-fixed-length-fp-extend-trunc.ll | 2 --
.../sve-streaming-mode-fixed-length-fp-fma.ll | 2 --
...e-streaming-mode-fixed-length-fp-minmax.ll | 8 -----
...streaming-mode-fixed-length-fp-rounding.ll | 14 ---------
...e-streaming-mode-fixed-length-fp-select.ll | 2 --
...e-streaming-mode-fixed-length-fp-to-int.ll | 12 +++-----
...-streaming-mode-fixed-length-fp-vselect.ll | 2 --
...e-streaming-mode-fixed-length-int-arith.ll | 12 +++-----
...treaming-mode-fixed-length-int-compares.ll | 3 +-
...sve-streaming-mode-fixed-length-int-div.ll | 6 ++--
...sve-streaming-mode-fixed-length-int-log.ll | 9 ++----
...-streaming-mode-fixed-length-int-minmax.ll | 12 +++-----
...ve-streaming-mode-fixed-length-int-mulh.ll | 30 +++++--------------
...sve-streaming-mode-fixed-length-int-rem.ll | 6 ++--
...-streaming-mode-fixed-length-int-select.ll | 3 +-
...-streaming-mode-fixed-length-int-shifts.ll | 9 ++----
...e-streaming-mode-fixed-length-int-to-fp.ll | 3 +-
...streaming-mode-fixed-length-int-vselect.ll | 3 +-
...-streaming-mode-fixed-length-ld2-alloca.ll | 4 +--
.../sve-streaming-mode-fixed-length-rev.ll | 3 +-
...e-streaming-mode-fixed-length-sdiv-pow2.ll | 3 +-
...treaming-mode-fixed-length-splat-vector.ll | 5 +---
...treaming-mode-fixed-length-trunc-stores.ll | 9 +++---
llvm/test/CodeGen/AArch64/v3f-to-int.ll | 4 +--
llvm/test/CodeGen/PowerPC/vsx-p9.ll | 4 +--
.../CodeGen/X86/atomic-non-integer-fp128.ll | 2 --
llvm/test/CodeGen/X86/pr30290.ll | 5 ++--
llvm/test/CodeGen/X86/vectorcall.ll | 8 ++---
34 files changed, 63 insertions(+), 147 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ccdd326fe5536..ef6dd71d3d839 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20611,9 +20611,13 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
if (!isTypeLegal(LDMemType))
break;
if (STMemType != LDMemType) {
- // TODO: Support vectors? This requires extract_subvector/bitcast.
if (LdMemSize == StMemSize) {
- if (DAG.isConstantValueOfAnyType(Val) && isTypeLegal(STMemType))
+ if (TLI.isLoadBitCastBeneficial(LDMemType, STMemType, DAG,
+ *LD->getMemOperand()) &&
+ TLI.isOperationLegal(ISD::BITCAST, LDMemType) &&
+ isTypeLegal(LDMemType) &&
+ TLI.isOperationLegal(ISD::BITCAST, STMemType) &&
+ isTypeLegal(STMemType))
Val = DAG.getBitcast(LDMemType, Val);
else
break;
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
index 35ffc99f7a405..2c1b735ffe28c 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
@@ -508,8 +508,8 @@ define <4 x i8> @small_vector(<4 x i8> %0) {
; CHECK-NEXT: add x29, sp, #176
; CHECK-NEXT: .seh_add_fp 176
; CHECK-NEXT: .seh_endprologue
+; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: str w0, [sp, #12]
-; CHECK-NEXT: ldr s0, [sp, #12]
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: blr x9
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
index dc352244deeef..6fba6a3974574 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
@@ -477,8 +477,8 @@ declare <4 x i8> @small_vector(<4 x i8> %0) nounwind;
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: stur s0, [x29, #-4]
; CHECK-NEXT: blr x16
+; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: stur w8, [x29, #-8]
-; CHECK-NEXT: ldur s0, [x29, #-8]
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: .seh_startepilogue
diff --git a/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll b/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll
index 8620c9a34b5d6..8fe8873ec3e0a 100644
--- a/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll
+++ b/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll
@@ -62,7 +62,6 @@ define <vscale x 4 x i32> @sti64ldi32(ptr nocapture %P, <vscale x 2 x i64> %v) {
; CHECK-LABEL: sti64ldi32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str z0, [x0, #1, mul vl]
-; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
; CHECK-NEXT: ret
entry:
%0 = bitcast ptr %P to ptr
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
index bd49db8a4c414..ea5eb0be5dc7f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
@@ -657,8 +657,7 @@ define <1 x i64> @ctlz_v1i64(<1 x i64> %op) {
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: fmov x8, d0
; NONEON-NOSVE-NEXT: clz x8, x8
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %op)
@@ -2347,8 +2346,7 @@ define <1 x i64> @ctpop_v1i64(<1 x i64> %op) {
; NONEON-NOSVE-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
; NONEON-NOSVE-NEXT: mul x8, x9, x8
; NONEON-NOSVE-NEXT: lsr x8, x8, #56
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x i64> @llvm.ctpop.v1i64(<1 x i64> %op)
@@ -3247,8 +3245,7 @@ define <1 x i64> @cttz_v1i64(<1 x i64> %op) {
; NONEON-NOSVE-NEXT: fmov x8, d0
; NONEON-NOSVE-NEXT: rbit x8, x8
; NONEON-NOSVE-NEXT: clz x8, x8
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %op)
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
index a8220d9993227..19f8298819ea5 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
@@ -467,8 +467,7 @@ define <1 x i64> @fcmp_oeq_v1f64(<1 x double> %op1, <1 x double> %op2) {
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: fcmp d0, d1
; NONEON-NOSVE-NEXT: csetm x8, eq
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%cmp = fcmp oeq <1 x double> %op1, %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
index 41a6cdc7df4dd..fec4c5994cc19 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
@@ -420,8 +420,6 @@ define void @fcvt_v1f16_v1f64(ptr %a, ptr %b) {
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldr h0, [x0]
; NONEON-NOSVE-NEXT: fcvt d0, h0
-; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: str d0, [x1]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
index fddd5df323e46..3f6050c079f9b 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
@@ -591,8 +591,6 @@ define <1 x double> @fma_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x double
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: fmadd d0, d0, d1, d2
-; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%mul = fmul contract <1 x double> %op1, %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
index e53d6a9081154..4a6e5489b91d4 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
@@ -404,8 +404,6 @@ define <1 x double> @fmaxnm_v1f64(<1 x double> %op1, <1 x double> %op2) {
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: fmaxnm d0, d0, d1
-; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.maxnum.v1f64(<1 x double> %op1, <1 x double> %op2)
@@ -880,8 +878,6 @@ define <1 x double> @fminnm_v1f64(<1 x double> %op1, <1 x double> %op2) {
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: fminnm d0, d0, d1
-; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.minnum.v1f64(<1 x double> %op1, <1 x double> %op2)
@@ -1356,8 +1352,6 @@ define <1 x double> @fmax_v1f64(<1 x double> %op1, <1 x double> %op2) {
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: fmax d0, d0, d1
-; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.maximum.v1f64(<1 x double> %op1, <1 x double> %op2)
@@ -1832,8 +1826,6 @@ define <1 x double> @fmin_v1f64(<1 x double> %op1, <1 x double> %op2) {
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: fmin d0, d0, d1
-; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.minimum.v1f64(<1 x double> %op1, <1 x double> %op2)
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
index 03bc39a6ef3ee..c7beda8fc45de 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
@@ -365,8 +365,6 @@ define <1 x double> @frintp_v1f64(<1 x double> %op) {
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: frintp d0, d0
-; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.ceil.v1f64(<1 x double> %op)
@@ -793,8 +791,6 @@ define <1 x double> @frintm_v1f64(<1 x double> %op) {
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: frintm d0, d0
-; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.floor.v1f64(<1 x double> %op)
@@ -1221,8 +1217,6 @@ define <1 x double> @frinti_v1f64(<1 x double> %op) {
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: frinti d0, d0
-; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %op)
@@ -1649,8 +1643,6 @@ define <1 x double> @frintx_v1f64(<1 x double> %op) {
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: frintx d0, d0
-; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.rint.v1f64(<1 x double> %op)
@@ -2077,8 +2069,6 @@ define <1 x double> @frinta_v1f64(<1 x double> %op) {
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: frinta d0, d0
-; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.round.v1f64(<1 x double> %op)
@@ -2505,8 +2495,6 @@ define <1 x double> @frintn_v1f64(<1 x double> %op) {
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: frintn d0, d0
-; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %op)
@@ -2933,8 +2921,6 @@ define <1 x double> @frintz_v1f64(<1 x double> %op) {
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: frintz d0, d0
-; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.trunc.v1f64(<1 x double> %op)
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
index bcc446d9d1a41..a3b1a0eb4c3b0 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
@@ -394,8 +394,6 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, i1 %mask
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: tst w0, #0x1
; NONEON-NOSVE-NEXT: fcsel d0, d0, d1, ne
-; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%sel = select i1 %mask, <1 x double> %op1, <1 x double> %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
index 21a26921ab031..76aa8e45ccda3 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
@@ -433,8 +433,7 @@ define <1 x i64> @fcvtzu_v1f16_v1i64(<1 x half> %op1) {
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvtzu x8, s0
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = fptoui <1 x half> %op1 to <1 x i64>
@@ -1639,8 +1638,7 @@ define <1 x i64> @fcvtzu_v1f64_v1i64(<1 x double> %op1) {
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: fcvtzu x8, d0
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = fptoui <1 x double> %op1 to <1 x i64>
@@ -2133,8 +2131,7 @@ define <1 x i64> @fcvtzs_v1f16_v1i64(<1 x half> %op1) {
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvtzs x8, s0
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = fptosi <1 x half> %op1 to <1 x i64>
@@ -3342,8 +3339,7 @@ define <1 x i64> @fcvtzs_v1f64_v1i64(<1 x double> %op1) {
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: fcvtzs x8, d0
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = fptosi <1 x double> %op1 to <1 x i64>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
index c43e929f47848..d7d5d2a1ea01d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
@@ -502,8 +502,6 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x i1>
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: tst w0, #0x1
; NONEON-NOSVE-NEXT: fcsel d0, d0, d1, ne
-; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%sel = select <1 x i1> %mask, <1 x double> %op1, <1 x double> %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
index 02b5469c0ff85..daf637f6e6003 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
@@ -685,8 +685,7 @@ define <1 x i64> @add_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x8, d1
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: add x8, x9, x8
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = add <1 x i64> %op1, %op2
@@ -1546,8 +1545,7 @@ define <1 x i64> @mul_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x8, d1
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: mul x8, x9, x8
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = mul <1 x i64> %op1, %op2
@@ -2320,8 +2318,7 @@ define <1 x i64> @sub_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x8, d1
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: sub x8, x9, x8
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = sub <1 x i64> %op1, %op2
@@ -3063,8 +3060,7 @@ define <1 x i64> @abs_v1i64(<1 x i64> %op1) {
; NONEON-NOSVE-NEXT: fmov x8, d0
; NONEON-NOSVE-NEXT: cmp x8, #0
; NONEON-NOSVE-NEXT: cneg x8, x8, mi
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x i64> @llvm.abs.v1i64(<1 x i64> %op1, i1 false)
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
index ba20de65a253a..370dbd301196f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
@@ -744,8 +744,7 @@ define <1 x i64> @icmp_eq_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: cmp x9, x8
; NONEON-NOSVE-NEXT: csetm x8, eq
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%cmp = icmp eq <1 x i64> %op1, %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index 8e1d61b51e2bb..aa8cc958d6886 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -843,8 +843,7 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x8, d1
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: sdiv x8, x9, x8
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = sdiv <1 x i64> %op1, %op2
@@ -1755,8 +1754,7 @@ define <1 x i64> @udiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x8, d1
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: udiv x8, x9, x8
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = udiv <1 x i64> %op1, %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
index 687dd9445f387..c8996bc0dadb1 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
@@ -620,8 +620,7 @@ define <1 x i64> @and_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x8, d1
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: and x8, x9, x8
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = and <1 x i64> %op1, %op2
@@ -1313,8 +1312,7 @@ define <1 x i64> @or_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x8, d1
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: orr x8, x9, x8
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = or <1 x i64> %op1, %op2
@@ -2006,8 +2004,7 @@ define <1 x i64> @xor_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x8, d1
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: eor x8, x9, x8
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = xor <1 x i64> %op1, %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
index d858d8171926e..2471478ba0fde 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
@@ -723,8 +723,7 @@ define <1 x i64> @smax_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: cmp x9, x8
; NONEON-NOSVE-NEXT: csel x8, x9, x8, gt
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %op1, <1 x i64> %op2)
@@ -1525,8 +1524,7 @@ define <1 x i64> @smin_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: cmp x9, x8
; NONEON-NOSVE-NEXT: csel x8, x9, x8, lt
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %op1, <1 x i64> %op2)
@@ -2327,8 +2325,7 @@ define <1 x i64> @umax_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: cmp x9, x8
; NONEON-NOSVE-NEXT: csel x8, x9, x8, hi
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %op1, <1 x i64> %op2)
@@ -3129,8 +3126,7 @@ define <1 x i64> @umin_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: cmp x9, x8
; NONEON-NOSVE-NEXT: csel x8, x9, x8, lo
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %op1, <1 x i64> %op2)
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
index f0d7f7df3db70..c23750f4bb844 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
@@ -1097,8 +1097,7 @@ define <1 x i64> @smulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x8, d0
; NONEON-NOSVE-NEXT: fmov x9, d1
; NONEON-NOSVE-NEXT: smulh x8, x8, x9
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%1 = sext <1 x i64> %op1 to <1 x i128>
@@ -1135,9 +1134,7 @@ define <2 x i64> @smulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; NONEON-NOSVE-NEXT: ldp x11, x10, [sp, #16]
; NONEON-NOSVE-NEXT: smulh x8, x8, x10
; NONEON-NOSVE-NEXT: smulh x9, x9, x11
-; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #32]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
+; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #48]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #48]
; NONEON-NOSVE-NEXT: add sp, sp, #64
; NONEON-NOSVE-NEXT: ret
@@ -1185,12 +1182,8 @@ define void @smulh_v4i64(ptr %a, ptr %b) {
; NONEON-NOSVE-NEXT: smulh x11, x11, x13
; NONEON-NOSVE-NEXT: smulh x8, x8, x12
; NONEON-NOSVE-NEXT: smulh x9, x9, x14
-; NONEON-NOSVE-NEXT: stp x11, x10, [sp, #64]
-; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #80]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96]
+; NONEON-NOSVE-NEXT: stp x11, x10, [sp, #96]
+; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #112]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #128
@@ -2292,8 +2285,7 @@ define <1 x i64> @umulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x8, d0
; NONEON-NOSVE-NEXT: fmov x9, d1
; NONEON-NOSVE-NEXT: umulh x8, x8, x9
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%1 = zext <1 x i64> %op1 to <1 x i128>
@@ -2330,9 +2322,7 @@ define <2 x i64> @umulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; NONEON-NOSVE-NEXT: ldp x11, x10, [sp, #16]
; NONEON-NOSVE-NEXT: umulh x8, x8, x10
; NONEON-NOSVE-NEXT: umulh x9, x9, x11
-; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #32]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
+; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #48]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #48]
; NONEON-NOSVE-NEXT: add sp, sp, #64
; NONEON-NOSVE-NEXT: ret
@@ -2380,12 +2370,8 @@ define void @umulh_v4i64(ptr %a, ptr %b) {
; NONEON-NOSVE-NEXT: umulh x11, x11, x13
; NONEON-NOSVE-NEXT: umulh x8, x8, x12
; NONEON-NOSVE-NEXT: umulh x9, x9, x14
-; NONEON-NOSVE-NEXT: stp x11, x10, [sp, #64]
-; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #80]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96]
+; NONEON-NOSVE-NEXT: stp x11, x10, [sp, #96]
+; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #112]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #128
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
index c4b6c0e6e924c..4c6805c7030ca 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
@@ -961,8 +961,7 @@ define <1 x i64> @srem_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: sdiv x10, x9, x8
; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = srem <1 x i64> %op1, %op2
@@ -2009,8 +2008,7 @@ define <1 x i64> @urem_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: udiv x10, x9, x8
; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = urem <1 x i64> %op1, %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
index 4ac156c42fda0..95978db65a9f4 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
@@ -746,8 +746,7 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) {
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: tst w0, #0x1
; NONEON-NOSVE-NEXT: csel x8, x9, x8, ne
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%sel = select i1 %mask, <1 x i64> %op1, <1 x i64> %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
index 4cf8945575ded..bfcdbbe5cad3f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
@@ -694,8 +694,7 @@ define <1 x i64> @ashr_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x8, d1
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: asr x8, x9, x8
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = ashr <1 x i64> %op1, %op2
@@ -1460,8 +1459,7 @@ define <1 x i64> @lshr_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x8, d1
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: lsr x8, x9, x8
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = lshr <1 x i64> %op1, %op2
@@ -2224,8 +2222,7 @@ define <1 x i64> @shl_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; NONEON-NOSVE-NEXT: fmov x8, d1
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: lsl x8, x9, x8
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = shl <1 x i64> %op1, %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
index 4379194b306dc..1901f24ef8167 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
@@ -404,8 +404,7 @@ define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) {
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8]
; NONEON-NOSVE-NEXT: ucvtf d0, w8
-; NONEON-NOSVE-NEXT: str d0, [sp]
-; NONEON-NOSVE-NEXT: ldr d0, [sp], #16
+; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = uitofp <1 x i16> %op1 to <1 x double>
ret <1 x double> %res
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
index e4763f0bb9ba9..edffb455e215e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
@@ -986,8 +986,7 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) {
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: tst w0, #0x1
; NONEON-NOSVE-NEXT: csel x8, x9, x8, ne
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%sel = select <1 x i1> %mask, <1 x i64> %op1, <1 x i64> %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
index 302942178a764..b6343c7ead699 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
@@ -81,9 +81,7 @@ define void @alloc_v6i8(ptr %st_ptr) nounwind {
; NONEON-NOSVE-NEXT: mov x19, x0
; NONEON-NOSVE-NEXT: add x0, sp, #24
; NONEON-NOSVE-NEXT: bl def
-; NONEON-NOSVE-NEXT: ldr x8, [sp, #24]
-; NONEON-NOSVE-NEXT: str x8, [sp]
-; NONEON-NOSVE-NEXT: ldr d0, [sp]
+; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11]
; NONEON-NOSVE-NEXT: strb w8, [sp, #21]
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
index c34cae12516ed..5dd1caf875b2e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
@@ -656,8 +656,7 @@ define <1 x i64> @bitreverse_v1i64(<1 x i64> %op) {
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: fmov x8, d0
; NONEON-NOSVE-NEXT: rbit x8, x8
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <1 x i64> @llvm.bitreverse.v1i64(<1 x i64> %op)
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
index 85ba964000234..d50782d06b488 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
@@ -865,8 +865,7 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) {
; NONEON-NOSVE-NEXT: asr x9, x8, #63
; NONEON-NOSVE-NEXT: add x8, x8, x9, lsr #59
; NONEON-NOSVE-NEXT: asr x8, x8, #5
-; NONEON-NOSVE-NEXT: str x8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x8
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = sdiv <1 x i64> %op1, shufflevector (<1 x i64> insertelement (<1 x i64> poison, i64 32, i32 0), <1 x i64> poison, <1 x i32> zeroinitializer)
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
index 335a1f88ad3cf..d7b08e6fbd270 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
@@ -302,8 +302,7 @@ define <1 x i64> @splat_v1i64(i64 %a) {
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT: str x0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: fmov d0, x0
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%insert = insertelement <1 x i64> poison, i64 %a, i64 0
@@ -532,8 +531,6 @@ define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) {
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%insert = insertelement <1 x double> poison, double %a, i64 0
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
index 13fcd94ea8a26..144f2cee886c9 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
@@ -138,12 +138,11 @@ define void @store_trunc_v2i256i64(ptr %ap, ptr %dest) {
;
; NONEON-NOSVE-LABEL: store_trunc_v2i256i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: ldr x8, [x0, #32]
-; NONEON-NOSVE-NEXT: ldr x9, [x0]
-; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #-32]!
+; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
+; NONEON-NOSVE-NEXT: ldr d0, [x0, #32]
+; NONEON-NOSVE-NEXT: ldr d1, [x0]
+; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #16]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
; NONEON-NOSVE-NEXT: str q0, [x1]
; NONEON-NOSVE-NEXT: add sp, sp, #32
diff --git a/llvm/test/CodeGen/AArch64/v3f-to-int.ll b/llvm/test/CodeGen/AArch64/v3f-to-int.ll
index 2e4041f60049b..25fa4c35bcff8 100644
--- a/llvm/test/CodeGen/AArch64/v3f-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/v3f-to-int.ll
@@ -6,10 +6,8 @@ define void @convert_v3f32() {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movi d0, #0000000000000000
; CHECK-NEXT: strb wzr, [x8]
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-NEXT: str h0, [x8]
+; CHECK-NEXT: strh wzr, [x8]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/PowerPC/vsx-p9.ll b/llvm/test/CodeGen/PowerPC/vsx-p9.ll
index b7598c6de4c10..0a62a284aa45f 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-p9.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-p9.ll
@@ -137,8 +137,8 @@ entry:
tail call void (...) @sink(<2 x double> %add.i12)
; CHECK: lxv 0, 0(3)
; CHECK: lxv 1, 0(3)
-; CHECK: xvadddp 0, 0, 1
-; CHECK: stxv 0,
+; CHECK: xvadddp {{[0-9]+}}, 0, 1
+; CHECK: stxv {{[0-9]+}},
; CHECK: bl sink
ret void
}
diff --git a/llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll b/llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll
index e8b4d3f6812d2..e7bbd91d98fab 100644
--- a/llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll
+++ b/llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll
@@ -55,8 +55,6 @@ define fp128 @load_fp128(ptr %fptr) {
; X64-AVX-LABEL: load_fp128:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovaps (%rdi), %xmm0
-; X64-AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; X64-AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
; X64-AVX-NEXT: retq
%v = load atomic fp128, ptr %fptr unordered, align 16
ret fp128 %v
diff --git a/llvm/test/CodeGen/X86/pr30290.ll b/llvm/test/CodeGen/X86/pr30290.ll
index 74e553191331f..1cf0947e03ec6 100644
--- a/llvm/test/CodeGen/X86/pr30290.ll
+++ b/llvm/test/CodeGen/X86/pr30290.ll
@@ -23,9 +23,8 @@ define void @foo(ptr byval(%struct.face) nocapture align 8) local_unnamed_addr {
; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,1,1]
; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movl $1, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0
-; CHECK-NEXT: vmovups %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
+; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm1
+; CHECK-NEXT: vmovups %xmm1, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %xmm0, (%rsp)
; CHECK-NEXT: callq bar at PLT
; CHECK-NEXT: addq $40, %rsp
diff --git a/llvm/test/CodeGen/X86/vectorcall.ll b/llvm/test/CodeGen/X86/vectorcall.ll
index 07446c6a7bfa4..9a7d002fc3178 100644
--- a/llvm/test/CodeGen/X86/vectorcall.ll
+++ b/llvm/test/CodeGen/X86/vectorcall.ll
@@ -152,10 +152,10 @@ entry:
define x86_vectorcallcc %struct.HVA4 @test_mixed_6(%struct.HVA4 inreg %a, ptr %b) {
; CHECK-LABEL: test_mixed_6
-; CHECK: movaps (%{{[re]}}sp), %xmm0
-; CHECK: movaps 16(%{{[re]}}sp), %xmm1
-; CHECK: movaps 32(%{{[re]}}sp), %xmm2
-; CHECK: movaps 48(%{{[re]}}sp), %xmm3
+; CHECK-DAG: movaps (%{{.*}}), %xmm0
+; CHECK-DAG: movaps 16(%{{.*}}), %xmm1
+; CHECK-DAG: movaps 32(%{{.*}}), %xmm2
+; CHECK-DAG: movaps 48(%{{.*}}), %xmm3
; CHECK: ret{{[ql]}}
entry:
%retval = alloca %struct.HVA4, align 16
>From 0c367bb1ae522e53b6606a98d1e0236f6c52618a Mon Sep 17 00:00:00 2001
From: Michael-Chen-NJU <2802328816 at qq.com>
Date: Thu, 29 Jan 2026 23:23:53 +0800
Subject: [PATCH 7/7] [SelectionDAG] Update ForwardStoreValueToDirectLoad to
handle known multiples of element size
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 12 +++++-------
llvm/test/CodeGen/AArch64/sve-stlf.ll | 12 ++++++++++++
llvm/test/CodeGen/RISCV/rvv/stlf.ll | 12 ++++++++++++
llvm/test/CodeGen/X86/dag-stlf-mismatch.ll | 13 +++++++++++++
4 files changed, 42 insertions(+), 7 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/sve-stlf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/stlf.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ef6dd71d3d839..b304b5e249c3b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20623,16 +20623,14 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
break;
} else if (LDMemType.isVector()) {
EVT EltVT = LDMemType.getVectorElementType();
- uint64_t EltSize = EltVT.getFixedSizeInBits();
- uint64_t StSize = StMemSize.getFixedValue();
+ uint64_t EltSize = EltVT.getSizeInBits();
- if (StSize % EltSize != 0)
+ if (!StMemSize.isKnownMultipleOf(EltSize))
break;
- EVT InterVT =
- EVT::getVectorVT(*DAG.getContext(), EltVT, StSize / EltSize);
- if (!isTypeLegal(InterVT) ||
- !TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, InterVT))
+ EVT InterVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ StMemSize.divideCoefficientBy(EltSize));
+ if (!TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, InterVT))
break;
Val = DAG.getExtractSubvector(SDLoc(LD), LDMemType,
diff --git a/llvm/test/CodeGen/AArch64/sve-stlf.ll b/llvm/test/CodeGen/AArch64/sve-stlf.ll
new file mode 100644
index 0000000000000..7403da8298312
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-stlf.ll
@@ -0,0 +1,12 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s
+
+define <vscale x 4 x i32> @test_stlf_scalable(ptr %p, <vscale x 4 x i32> %v) {
+; CHECK-LABEL: test_stlf_scalable:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str z0, [x0]
+; CHECK-NEXT: ret
+ store <vscale x 4 x i32> %v, ptr %p
+ %res = load <vscale x 4 x i32>, ptr %p
+ ret <vscale x 4 x i32> %res
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/stlf.ll b/llvm/test/CodeGen/RISCV/rvv/stlf.ll
new file mode 100644
index 0000000000000..171271a9b8967
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/stlf.ll
@@ -0,0 +1,12 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
+
+define <vscale x 4 x i32> @test_stlf_riscv_scalable(ptr %p, <vscale x 4 x i32> %v) {
+; CHECK-LABEL: test_stlf_riscv_scalable:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vs2r.v v8, (a0)
+; CHECK-NEXT: ret
+ store <vscale x 4 x i32> %v, ptr %p
+ %res = load <vscale x 4 x i32>, ptr %p
+ ret <vscale x 4 x i32> %res
+}
diff --git a/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll b/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
index 5d830710021eb..a1ee713b32032 100644
--- a/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
+++ b/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
@@ -56,3 +56,16 @@ define void @test_stlf_late_byval(ptr %ptr) nounwind {
call void @ext_func(ptr byval(%struct.Data) align 4 %ptr)
ret void
}
+
+define float @test_stlf_variable(ptr %p, i32 %val, float %v) {
+; CHECK-LABEL: test_stlf_variable:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movd %esi, %xmm1
+; CHECK-NEXT: movl %esi, (%rdi)
+; CHECK-NEXT: mulss %xmm1, %xmm0
+; CHECK-NEXT: retq
+ store i32 %val, ptr %p, align 4
+ %f = load float, ptr %p, align 4
+ %r = fmul float %f, %v
+ ret float %r
+}
More information about the llvm-commits
mailing list