[llvm] [AArch64] Fix incorrect big-endian spill in foldMemoryOperandImpl (PR #65601)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 7 05:41:39 PDT 2023
https://github.com/john-brawn-arm created https://github.com/llvm/llvm-project/pull/65601:
When an sreg sub-register of a q register was spilled, AArch64InstrInfo::foldMemoryOperandImpl would emit a spill of a d register, which gives the wrong result when the target is big-endian as the following q register fill will put the value in the top half.
Fix this by greatly simplifying the existing code for widening the spill to only handle wzr to xzr widening, as the default result we get if the function returns nullptr is already that a widened spill will be emitted.
>From 9b98136a2e0cc9d9ee18673936a0647cb5a4e18d Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn at arm.com>
Date: Wed, 6 Sep 2023 14:46:41 +0100
Subject: [PATCH] [AArch64] Fix incorrect big-endian spill in
foldMemoryOperandImpl
When an sreg sub-register of a q register was spilled,
AArch64InstrInfo::foldMemoryOperandImpl would emit a spill of a d
register, which gives the wrong result when the target is big-endian
as the following q register fill will put the value in the top half.
Fix this by greatly simplifying the existing code for widening the
spill to only handle wzr to xzr widening, as the default result we
get if the function returns nullptr is already that a widened spill
will be emitted.
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 39 ++------
.../CodeGen/AArch64/arm64-neon-mul-div.ll | 7 +-
llvm/test/CodeGen/AArch64/frem.ll | 89 ++++++++++++-------
llvm/test/CodeGen/AArch64/llvm.exp10.ll | 34 +++++--
llvm/test/CodeGen/AArch64/pow.ll | 4 +-
llvm/test/CodeGen/AArch64/spill-fold.mir | 16 ++++
.../CodeGen/AArch64/sve-fixed-length-fp128.ll | 4 +
llvm/test/CodeGen/AArch64/vec-libcalls.ll | 27 ++++--
8 files changed, 133 insertions(+), 87 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index a41ac0e44a7700b..e18bb28c6ae6d06 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4690,42 +4690,13 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
//
// STRXui %xzr, %stack.0
//
- if (IsSpill && DstMO.isUndef() && SrcReg.isPhysical()) {
+ if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
+ TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
assert(SrcMO.getSubReg() == 0 &&
"Unexpected subreg on physical register");
- const TargetRegisterClass *SpillRC;
- unsigned SpillSubreg;
- switch (DstMO.getSubReg()) {
- default:
- SpillRC = nullptr;
- break;
- case AArch64::sub_32:
- case AArch64::ssub:
- if (AArch64::GPR32RegClass.contains(SrcReg)) {
- SpillRC = &AArch64::GPR64RegClass;
- SpillSubreg = AArch64::sub_32;
- } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
- SpillRC = &AArch64::FPR64RegClass;
- SpillSubreg = AArch64::ssub;
- } else
- SpillRC = nullptr;
- break;
- case AArch64::dsub:
- if (AArch64::FPR64RegClass.contains(SrcReg)) {
- SpillRC = &AArch64::FPR128RegClass;
- SpillSubreg = AArch64::dsub;
- } else
- SpillRC = nullptr;
- break;
- }
-
- if (SpillRC)
- if (unsigned WidenedSrcReg =
- TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
- storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
- FrameIndex, SpillRC, &TRI, Register());
- return &*--InsertPt;
- }
+ storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
+ FrameIndex, &AArch64::GPR64RegClass, &TRI, Register());
+ return &*--InsertPt;
}
// Handle cases like filling use of:
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
index 7039cccdf9393c9..ecf3f69825c0e9e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
@@ -1486,7 +1486,8 @@ define <2 x float> @frem2f32(<2 x float> %A, <2 x float> %B) {
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: mov s1, v1.s[1]
; CHECK-NEXT: bl fmodf
-; CHECK-NEXT: str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: // kill: def $s1 killed $s1 killed $q1
@@ -1513,7 +1514,8 @@ define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) {
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: mov s1, v1.s[1]
; CHECK-NEXT: bl fmodf
-; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: // kill: def $s1 killed $s1 killed $q1
@@ -1569,6 +1571,7 @@ define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) {
; CHECK-NEXT: mov d0, v0.d[1]
; CHECK-NEXT: mov d1, v1.d[1]
; CHECK-NEXT: bl fmod
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
diff --git a/llvm/test/CodeGen/AArch64/frem.ll b/llvm/test/CodeGen/AArch64/frem.ll
index 90e93577efd9f60..02b785bf9669415 100644
--- a/llvm/test/CodeGen/AArch64/frem.ll
+++ b/llvm/test/CodeGen/AArch64/frem.ll
@@ -68,6 +68,7 @@ define <2 x double> @frem_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-SD-NEXT: mov d0, v0.d[1]
; CHECK-SD-NEXT: mov d1, v1.d[1]
; CHECK-SD-NEXT: bl fmod
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -94,9 +95,10 @@ define <2 x double> @frem_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
; CHECK-GI-NEXT: bl fmod
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT: fmov d0, d8
; CHECK-GI-NEXT: fmov d1, d9
+; CHECK-GI-NEXT: fmov d0, d8
; CHECK-GI-NEXT: bl fmod
; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
@@ -200,6 +202,7 @@ define <4 x double> @frem_v4f64(<4 x double> %a, <4 x double> %b) {
; CHECK-SD-NEXT: stp q1, q3, [sp, #48] // 32-byte Folded Spill
; CHECK-SD-NEXT: mov d1, v2.d[1]
; CHECK-SD-NEXT: bl fmod
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -213,6 +216,7 @@ define <4 x double> @frem_v4f64(<4 x double> %a, <4 x double> %b) {
; CHECK-SD-NEXT: mov d0, v0.d[1]
; CHECK-SD-NEXT: mov d1, v1.d[1]
; CHECK-SD-NEXT: bl fmod
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldp q0, q1, [sp, #48] // 32-byte Folded Reload
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -248,19 +252,22 @@ define <4 x double> @frem_v4f64(<4 x double> %a, <4 x double> %b) {
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
; CHECK-GI-NEXT: mov d9, v4.d[1]
; CHECK-GI-NEXT: bl fmod
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-GI-NEXT: fmov d0, d8
; CHECK-GI-NEXT: fmov d1, d10
+; CHECK-GI-NEXT: fmov d0, d8
; CHECK-GI-NEXT: bl fmod
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: bl fmod
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT: fmov d0, d9
; CHECK-GI-NEXT: fmov d1, d11
+; CHECK-GI-NEXT: fmov d0, d9
; CHECK-GI-NEXT: bl fmod
; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
@@ -292,7 +299,8 @@ define <2 x float> @frem_v2f32(<2 x float> %a, <2 x float> %b) {
; CHECK-SD-NEXT: mov s0, v0.s[1]
; CHECK-SD-NEXT: mov s1, v1.s[1]
; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1
@@ -321,9 +329,10 @@ define <2 x float> @frem_v2f32(<2 x float> %a, <2 x float> %b) {
; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1
; CHECK-GI-NEXT: bl fmodf
-; CHECK-GI-NEXT: str d0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT: fmov s0, s8
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: fmov s1, s9
+; CHECK-GI-NEXT: fmov s0, s8
; CHECK-GI-NEXT: bl fmodf
; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
@@ -349,7 +358,8 @@ define <3 x float> @frem_v3f32(<3 x float> %a, <3 x float> %b) {
; CHECK-SD-NEXT: mov s0, v0.s[1]
; CHECK-SD-NEXT: mov s1, v1.s[1]
; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1
@@ -389,13 +399,15 @@ define <3 x float> @frem_v3f32(<3 x float> %a, <3 x float> %b) {
; CHECK-GI-NEXT: mov s11, v1.s[2]
; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1
; CHECK-GI-NEXT: bl fmodf
-; CHECK-GI-NEXT: str d0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT: fmov s0, s8
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT: fmov s1, s10
+; CHECK-GI-NEXT: fmov s0, s8
; CHECK-GI-NEXT: bl fmodf
-; CHECK-GI-NEXT: str d0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT: fmov s0, s9
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: fmov s1, s11
+; CHECK-GI-NEXT: fmov s0, s9
; CHECK-GI-NEXT: bl fmodf
; CHECK-GI-NEXT: ldp q2, q1, [sp] // 32-byte Folded Reload
; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
@@ -424,7 +436,8 @@ define <4 x float> @frem_v4f32(<4 x float> %a, <4 x float> %b) {
; CHECK-SD-NEXT: mov s0, v0.s[1]
; CHECK-SD-NEXT: mov s1, v1.s[1]
; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1
@@ -477,17 +490,20 @@ define <4 x float> @frem_v4f32(<4 x float> %a, <4 x float> %b) {
; CHECK-GI-NEXT: mov s13, v1.s[3]
; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1
; CHECK-GI-NEXT: bl fmodf
-; CHECK-GI-NEXT: str d0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT: fmov s0, s8
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: fmov s1, s11
+; CHECK-GI-NEXT: fmov s0, s8
; CHECK-GI-NEXT: bl fmodf
-; CHECK-GI-NEXT: str d0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT: fmov s0, s9
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT: fmov s1, s12
+; CHECK-GI-NEXT: fmov s0, s9
; CHECK-GI-NEXT: bl fmodf
-; CHECK-GI-NEXT: str d0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT: fmov s0, s10
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: fmov s1, s13
+; CHECK-GI-NEXT: fmov s0, s10
; CHECK-GI-NEXT: bl fmodf
; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload
; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
@@ -519,7 +535,8 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) {
; CHECK-SD-NEXT: stp q1, q3, [sp, #32] // 32-byte Folded Spill
; CHECK-SD-NEXT: mov s1, v2.s[1]
; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: str d0, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1
@@ -548,7 +565,8 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) {
; CHECK-SD-NEXT: mov s0, v0.s[1]
; CHECK-SD-NEXT: mov s1, v1.s[1]
; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1
@@ -616,32 +634,39 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) {
; CHECK-GI-NEXT: mov s2, v3.s[3]
; CHECK-GI-NEXT: stp s2, s5, [sp, #200] // 8-byte Folded Spill
; CHECK-GI-NEXT: bl fmodf
-; CHECK-GI-NEXT: str d0, [sp, #96] // 16-byte Folded Spill
-; CHECK-GI-NEXT: fmov s0, s8
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
; CHECK-GI-NEXT: fmov s1, s14
+; CHECK-GI-NEXT: fmov s0, s8
; CHECK-GI-NEXT: bl fmodf
-; CHECK-GI-NEXT: str d0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT: fmov s0, s9
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: fmov s1, s15
+; CHECK-GI-NEXT: fmov s0, s9
; CHECK-GI-NEXT: bl fmodf
-; CHECK-GI-NEXT: str d0, [sp, #80] // 16-byte Folded Spill
-; CHECK-GI-NEXT: fmov s0, s10
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
; CHECK-GI-NEXT: fmov s1, s13
+; CHECK-GI-NEXT: fmov s0, s10
; CHECK-GI-NEXT: bl fmodf
-; CHECK-GI-NEXT: str d0, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1
; CHECK-GI-NEXT: bl fmodf
; CHECK-GI-NEXT: fmov s1, s12
-; CHECK-GI-NEXT: str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT: ldr s0, [sp, #64] // 4-byte Folded Reload
; CHECK-GI-NEXT: bl fmodf
; CHECK-GI-NEXT: fmov s1, s11
-; CHECK-GI-NEXT: str d0, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
; CHECK-GI-NEXT: ldr s0, [sp, #112] // 4-byte Folded Reload
; CHECK-GI-NEXT: bl fmodf
-; CHECK-GI-NEXT: str d0, [sp, #112] // 16-byte Folded Spill
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Folded Spill
; CHECK-GI-NEXT: ldp s1, s0, [sp, #200] // 8-byte Folded Reload
; CHECK-GI-NEXT: bl fmodf
; CHECK-GI-NEXT: ldp q3, q2, [sp, #16] // 32-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/llvm.exp10.ll b/llvm/test/CodeGen/AArch64/llvm.exp10.ll
index e2cd3835e44996b..783ef4c769606a2 100644
--- a/llvm/test/CodeGen/AArch64/llvm.exp10.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.exp10.ll
@@ -354,7 +354,8 @@ define <2 x float> @exp10_v2f32(<2 x float> %x) {
; SDAG-NEXT: str q0, [sp] // 16-byte Folded Spill
; SDAG-NEXT: mov s0, v0.s[1]
; SDAG-NEXT: bl exp10f
-; SDAG-NEXT: str d0, [sp, #16] // 16-byte Folded Spill
+; SDAG-NEXT: // kill: def $s0 killed $s0 def $q0
+; SDAG-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; SDAG-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; SDAG-NEXT: // kill: def $s0 killed $s0 killed $q0
; SDAG-NEXT: bl exp10f
@@ -378,7 +379,8 @@ define <2 x float> @exp10_v2f32(<2 x float> %x) {
; GISEL-NEXT: mov s8, v0.s[1]
; GISEL-NEXT: // kill: def $s0 killed $s0 killed $q0
; GISEL-NEXT: bl exp10f
-; GISEL-NEXT: str d0, [sp] // 16-byte Folded Spill
+; GISEL-NEXT: // kill: def $s0 killed $s0 def $q0
+; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill
; GISEL-NEXT: fmov s0, s8
; GISEL-NEXT: bl exp10f
; GISEL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
@@ -403,7 +405,8 @@ define <3 x float> @exp10_v3f32(<3 x float> %x) {
; SDAG-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; SDAG-NEXT: mov s0, v0.s[1]
; SDAG-NEXT: bl exp10f
-; SDAG-NEXT: str d0, [sp] // 16-byte Folded Spill
+; SDAG-NEXT: // kill: def $s0 killed $s0 def $q0
+; SDAG-NEXT: str q0, [sp] // 16-byte Folded Spill
; SDAG-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; SDAG-NEXT: // kill: def $s0 killed $s0 killed $q0
; SDAG-NEXT: bl exp10f
@@ -435,10 +438,12 @@ define <3 x float> @exp10_v3f32(<3 x float> %x) {
; GISEL-NEXT: mov s9, v0.s[2]
; GISEL-NEXT: // kill: def $s0 killed $s0 killed $q0
; GISEL-NEXT: bl exp10f
-; GISEL-NEXT: str d0, [sp, #16] // 16-byte Folded Spill
+; GISEL-NEXT: // kill: def $s0 killed $s0 def $q0
+; GISEL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; GISEL-NEXT: fmov s0, s8
; GISEL-NEXT: bl exp10f
-; GISEL-NEXT: str d0, [sp] // 16-byte Folded Spill
+; GISEL-NEXT: // kill: def $s0 killed $s0 def $q0
+; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill
; GISEL-NEXT: fmov s0, s9
; GISEL-NEXT: bl exp10f
; GISEL-NEXT: ldp q2, q1, [sp] // 32-byte Folded Reload
@@ -465,7 +470,8 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) {
; SDAG-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; SDAG-NEXT: mov s0, v0.s[1]
; SDAG-NEXT: bl exp10f
-; SDAG-NEXT: str d0, [sp] // 16-byte Folded Spill
+; SDAG-NEXT: // kill: def $s0 killed $s0 def $q0
+; SDAG-NEXT: str q0, [sp] // 16-byte Folded Spill
; SDAG-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; SDAG-NEXT: // kill: def $s0 killed $s0 killed $q0
; SDAG-NEXT: bl exp10f
@@ -507,13 +513,16 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) {
; GISEL-NEXT: mov s10, v0.s[3]
; GISEL-NEXT: // kill: def $s0 killed $s0 killed $q0
; GISEL-NEXT: bl exp10f
-; GISEL-NEXT: str d0, [sp, #32] // 16-byte Folded Spill
+; GISEL-NEXT: // kill: def $s0 killed $s0 def $q0
+; GISEL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; GISEL-NEXT: fmov s0, s8
; GISEL-NEXT: bl exp10f
-; GISEL-NEXT: str d0, [sp, #16] // 16-byte Folded Spill
+; GISEL-NEXT: // kill: def $s0 killed $s0 def $q0
+; GISEL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; GISEL-NEXT: fmov s0, s9
; GISEL-NEXT: bl exp10f
-; GISEL-NEXT: str d0, [sp] // 16-byte Folded Spill
+; GISEL-NEXT: // kill: def $s0 killed $s0 def $q0
+; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill
; GISEL-NEXT: fmov s0, s10
; GISEL-NEXT: bl exp10f
; GISEL-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload
@@ -565,6 +574,7 @@ define <2 x double> @exp10_v2f64(<2 x double> %x) {
; SDAG-NEXT: str q0, [sp] // 16-byte Folded Spill
; SDAG-NEXT: mov d0, v0.d[1]
; SDAG-NEXT: bl exp10
+; SDAG-NEXT: // kill: def $d0 killed $d0 def $q0
; SDAG-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; SDAG-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; SDAG-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -587,6 +597,7 @@ define <2 x double> @exp10_v2f64(<2 x double> %x) {
; GISEL-NEXT: mov d8, v0.d[1]
; GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
; GISEL-NEXT: bl exp10
+; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill
; GISEL-NEXT: fmov d0, d8
; GISEL-NEXT: bl exp10
@@ -671,6 +682,7 @@ define <4 x double> @exp10_v4f64(<4 x double> %x) {
; SDAG-NEXT: mov d0, v0.d[1]
; SDAG-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
; SDAG-NEXT: bl exp10
+; SDAG-NEXT: // kill: def $d0 killed $d0 def $q0
; SDAG-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; SDAG-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; SDAG-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -682,6 +694,7 @@ define <4 x double> @exp10_v4f64(<4 x double> %x) {
; SDAG-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; SDAG-NEXT: mov d0, v0.d[1]
; SDAG-NEXT: bl exp10
+; SDAG-NEXT: // kill: def $d0 killed $d0 def $q0
; SDAG-NEXT: str q0, [sp] // 16-byte Folded Spill
; SDAG-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; SDAG-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -707,13 +720,16 @@ define <4 x double> @exp10_v4f64(<4 x double> %x) {
; GISEL-NEXT: mov d9, v1.d[1]
; GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
; GISEL-NEXT: bl exp10
+; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; GISEL-NEXT: fmov d0, d8
; GISEL-NEXT: bl exp10
+; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; GISEL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
; GISEL-NEXT: bl exp10
+; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill
; GISEL-NEXT: fmov d0, d9
; GISEL-NEXT: bl exp10
diff --git a/llvm/test/CodeGen/AArch64/pow.ll b/llvm/test/CodeGen/AArch64/pow.ll
index 623429c1085ac32..6adcd7ad70088b0 100644
--- a/llvm/test/CodeGen/AArch64/pow.ll
+++ b/llvm/test/CodeGen/AArch64/pow.ll
@@ -75,7 +75,8 @@ define <4 x float> @pow_v4f32_one_fourth_not_enough_fmf(<4 x float> %x) nounwind
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-NEXT: bl powf
; CHECK-NEXT: fmov s1, #0.25000000
-; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl powf
@@ -116,6 +117,7 @@ define <2 x double> @pow_v2f64_one_fourth_not_enough_fmf(<2 x double> %x) nounwi
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-NEXT: bl pow
; CHECK-NEXT: fmov d1, #0.25000000
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
diff --git a/llvm/test/CodeGen/AArch64/spill-fold.mir b/llvm/test/CodeGen/AArch64/spill-fold.mir
index 02d409d04da71d0..edb179e91ad3ad2 100644
--- a/llvm/test/CodeGen/AArch64/spill-fold.mir
+++ b/llvm/test/CodeGen/AArch64/spill-fold.mir
@@ -3,6 +3,7 @@
define i64 @test_subreg_spill_fold() { ret i64 0 }
define i64 @test_subreg_spill_fold2() { ret i64 0 }
define i64 @test_subreg_spill_fold3() { ret i64 0 }
+ define <4 x float> @test_subreg_spill_fold4() { ret <4 x float> undef }
define i64 @test_subreg_fill_fold() { ret i64 0 }
define double @test_subreg_fill_fold2() { ret double 0.0 }
define i64 @test_nzcv_spill_fold() { ret i64 0 }
@@ -50,6 +51,21 @@ body: |
RET_ReallyLR implicit $x0
...
---
+# CHECK-LABEL: name: test_subreg_spill_fold4
+# A spilled write to a 128-bit register needs a 128-bit store
+name: test_subreg_spill_fold4
+registers:
+ - { id: 0, class: fpr128 }
+body: |
+ bb.0:
+ ; CHECK: undef %1.ssub:fpr128 = COPY $wzr
+ ; CHECK-NEXT: STRQui %1, %stack.0, 0 :: (store (s128) into %stack.0)
+ undef %0.ssub:fpr128 = COPY $wzr
+ INLINEASM &nop, 1, 12, implicit-def dead $d0, 12, implicit-def dead $d1, 12, implicit-def dead $d2, 12, implicit-def dead $d3, 12, implicit-def dead $d4, 12, implicit-def dead $d5, 12, implicit-def dead $d6, 12, implicit-def dead $d7, 12, implicit-def dead $d8, 12, implicit-def dead $d9, 12, implicit-def dead $d10, 12, implicit-def dead $d11, 12, implicit-def dead $d12, 12, implicit-def dead $d13, 12, implicit-def dead $d14, 12, implicit-def dead $d15, 12, implicit-def dead $d16, 12, implicit-def dead $d17, 12, implicit-def dead $d18, 12, implicit-def dead $d19, 12, implicit-def dead $d20, 12, implicit-def dead $d21, 12, implicit-def dead $d22, 12, implicit-def dead $d23, 12, implicit-def dead $d24, 12, implicit-def dead $d25, 12, implicit-def dead $d26, 12, implicit-def dead $d27, 12, implicit-def dead $d28, 12, implicit-def dead $d29, 12, implicit-def dead $d30, 12, implicit-def $d31
+ $q0 = COPY %0
+ RET_ReallyLR implicit $q0
+...
+---
# CHECK-LABEL: name: test_subreg_fill_fold
# Ensure that the filled COPY is eliminated and folded into the fill load.
name: test_subreg_fill_fold
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
index 9b0d86556f0b72b..7ac1b58c626d191 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
@@ -75,6 +75,7 @@ define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [x0, #48]
; CHECK-NEXT: bl __trunctfdf2
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: bl __trunctfdf2
@@ -85,6 +86,7 @@ define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: bl __trunctfdf2
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: bl __trunctfdf2
@@ -98,6 +100,7 @@ define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: bl __trunctfdf2
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: bl __trunctfdf2
@@ -108,6 +111,7 @@ define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: bl __trunctfdf2
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
; CHECK-NEXT: bl __trunctfdf2
diff --git a/llvm/test/CodeGen/AArch64/vec-libcalls.ll b/llvm/test/CodeGen/AArch64/vec-libcalls.ll
index e1b4967ed0fb93d..aa2cca40a92c2cf 100644
--- a/llvm/test/CodeGen/AArch64/vec-libcalls.ll
+++ b/llvm/test/CodeGen/AArch64/vec-libcalls.ll
@@ -56,7 +56,8 @@ define <2 x float> @sin_v2f32(<2 x float> %x) nounwind {
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-NEXT: bl sinf
-; CHECK-NEXT: str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl sinf
@@ -79,7 +80,8 @@ define <3 x float> @sin_v3f32(<3 x float> %x) nounwind {
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-NEXT: bl sinf
-; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl sinf
@@ -109,7 +111,8 @@ define <4 x float> @sin_v4f32(<4 x float> %x) nounwind {
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-NEXT: bl sinf
-; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl sinf
@@ -271,7 +274,8 @@ define <3 x float> @cos_v3f32(<3 x float> %x) nounwind {
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-NEXT: bl cosf
-; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl cosf
@@ -301,7 +305,8 @@ define <3 x float> @exp_v3f32(<3 x float> %x) nounwind {
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-NEXT: bl expf
-; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl expf
@@ -331,7 +336,8 @@ define <3 x float> @exp2_v3f32(<3 x float> %x) nounwind {
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-NEXT: bl exp2f
-; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl exp2f
@@ -370,7 +376,8 @@ define <3 x float> @log_v3f32(<3 x float> %x) nounwind {
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-NEXT: bl logf
-; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl logf
@@ -400,7 +407,8 @@ define <3 x float> @log10_v3f32(<3 x float> %x) nounwind {
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-NEXT: bl log10f
-; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl log10f
@@ -430,7 +438,8 @@ define <3 x float> @log2_v3f32(<3 x float> %x) nounwind {
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-NEXT: bl log2f
-; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl log2f
More information about the llvm-commits
mailing list