[llvm] a574ef6 - [AArch64] Fix incorrect big-endian spill in foldMemoryOperandImpl (#65601)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 12 08:10:33 PDT 2023


Author: john-brawn-arm
Date: 2023-10-12T16:10:28+01:00
New Revision: a574ef61766d49db4350b6f06a108f36bccb25bb

URL: https://github.com/llvm/llvm-project/commit/a574ef61766d49db4350b6f06a108f36bccb25bb
DIFF: https://github.com/llvm/llvm-project/commit/a574ef61766d49db4350b6f06a108f36bccb25bb.diff

LOG: [AArch64] Fix incorrect big-endian spill in foldMemoryOperandImpl (#65601)

When an sreg sub-register of a q register was spilled,
AArch64InstrInfo::foldMemoryOperandImpl would emit a spill of a d
register, which gives the wrong result when the target is big-endian as
the following q register fill will put the value in the top half.

Fix this by greatly simplifying the existing code for widening the spill
to only handle wzr to xzr widening, as the default result we get if the
function returns nullptr is already that a widened spill will be
emitted.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
    llvm/test/CodeGen/AArch64/fexplog.ll
    llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
    llvm/test/CodeGen/AArch64/fpow.ll
    llvm/test/CodeGen/AArch64/frem.ll
    llvm/test/CodeGen/AArch64/fsincos.ll
    llvm/test/CodeGen/AArch64/llvm.exp10.ll
    llvm/test/CodeGen/AArch64/pow.ll
    llvm/test/CodeGen/AArch64/spill-fold.mir
    llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
    llvm/test/CodeGen/AArch64/vec-libcalls.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index c804312da5369ef..e03a94de007c9f5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -5507,42 +5507,14 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
     //
     //   STRXui %xzr, %stack.0
     //
-    if (IsSpill && DstMO.isUndef() && SrcReg.isPhysical()) {
+    if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
+        TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
       assert(SrcMO.getSubReg() == 0 &&
              "Unexpected subreg on physical register");
-      const TargetRegisterClass *SpillRC;
-      unsigned SpillSubreg;
-      switch (DstMO.getSubReg()) {
-      default:
-        SpillRC = nullptr;
-        break;
-      case AArch64::sub_32:
-      case AArch64::ssub:
-        if (AArch64::GPR32RegClass.contains(SrcReg)) {
-          SpillRC = &AArch64::GPR64RegClass;
-          SpillSubreg = AArch64::sub_32;
-        } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
-          SpillRC = &AArch64::FPR64RegClass;
-          SpillSubreg = AArch64::ssub;
-        } else
-          SpillRC = nullptr;
-        break;
-      case AArch64::dsub:
-        if (AArch64::FPR64RegClass.contains(SrcReg)) {
-          SpillRC = &AArch64::FPR128RegClass;
-          SpillSubreg = AArch64::dsub;
-        } else
-          SpillRC = nullptr;
-        break;
-      }
-
-      if (SpillRC)
-        if (unsigned WidenedSrcReg =
-                TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
-          storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
-                              FrameIndex, SpillRC, &TRI, Register());
-          return &*--InsertPt;
-        }
+      storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
+                          FrameIndex, &AArch64::GPR64RegClass, &TRI,
+                          Register());
+      return &*--InsertPt;
     }
 
     // Handle cases like filling use of:

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
index 7039cccdf9393c9..ecf3f69825c0e9e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
@@ -1486,7 +1486,8 @@ define <2 x float> @frem2f32(<2 x float> %A, <2 x float> %B) {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    mov s1, v1.s[1]
 ; CHECK-NEXT:    bl fmodf
-; CHECK-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    // kill: def $s1 killed $s1 killed $q1
@@ -1513,7 +1514,8 @@ define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    mov s1, v1.s[1]
 ; CHECK-NEXT:    bl fmodf
-; CHECK-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    // kill: def $s1 killed $s1 killed $q1
@@ -1569,6 +1571,7 @@ define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) {
 ; CHECK-NEXT:    mov d0, v0.d[1]
 ; CHECK-NEXT:    mov d1, v1.d[1]
 ; CHECK-NEXT:    bl fmod
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0

diff  --git a/llvm/test/CodeGen/AArch64/fexplog.ll b/llvm/test/CodeGen/AArch64/fexplog.ll
index c2d6a1f5de1918d..be6040faa2836bf 100644
--- a/llvm/test/CodeGen/AArch64/fexplog.ll
+++ b/llvm/test/CodeGen/AArch64/fexplog.ll
@@ -64,6 +64,7 @@ define <2 x double> @exp_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl exp
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -86,6 +87,7 @@ define <2 x double> @exp_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl exp
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl exp
@@ -172,6 +174,7 @@ define <4 x double> @exp_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl exp
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -183,6 +186,7 @@ define <4 x double> @exp_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl exp
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -208,13 +212,16 @@ define <4 x double> @exp_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl exp
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl exp
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl exp
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl exp
@@ -244,7 +251,8 @@ define <2 x float> @exp_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl expf
-; CHECK-SD-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl expf
@@ -268,7 +276,8 @@ define <2 x float> @exp_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl expf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
@@ -294,7 +303,8 @@ define <3 x float> @exp_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl expf
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl expf
@@ -326,10 +336,12 @@ define <3 x float> @exp_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl expf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
@@ -357,7 +369,8 @@ define <4 x float> @exp_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl expf
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl expf
@@ -399,13 +412,16 @@ define <4 x float> @exp_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl expf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
@@ -435,7 +451,8 @@ define <8 x float> @exp_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl expf
-; CHECK-SD-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl expf
@@ -460,7 +477,8 @@ define <8 x float> @exp_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl expf
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl expf
@@ -510,26 +528,33 @@ define <8 x float> @exp_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    str d0, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    str d0, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    str d0, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    str d0, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl expf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
@@ -1320,6 +1345,7 @@ define <2 x double> @exp2_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl exp2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -1342,6 +1368,7 @@ define <2 x double> @exp2_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl exp2
@@ -1428,6 +1455,7 @@ define <4 x double> @exp2_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl exp2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -1439,6 +1467,7 @@ define <4 x double> @exp2_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl exp2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -1464,13 +1493,16 @@ define <4 x double> @exp2_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl exp2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl exp2
@@ -1500,7 +1532,8 @@ define <2 x float> @exp2_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl exp2f
-; CHECK-SD-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2f
@@ -1524,7 +1557,8 @@ define <2 x float> @exp2_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl exp2f
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
@@ -1550,7 +1584,8 @@ define <3 x float> @exp2_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl exp2f
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2f
@@ -1582,10 +1617,12 @@ define <3 x float> @exp2_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl exp2f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
@@ -1613,7 +1650,8 @@ define <4 x float> @exp2_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl exp2f
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2f
@@ -1655,13 +1693,16 @@ define <4 x float> @exp2_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl exp2f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
@@ -1691,7 +1732,8 @@ define <8 x float> @exp2_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl exp2f
-; CHECK-SD-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2f
@@ -1716,7 +1758,8 @@ define <8 x float> @exp2_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl exp2f
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2f
@@ -1766,26 +1809,33 @@ define <8 x float> @exp2_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    str d0, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    str d0, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    str d0, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    str d0, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl exp2f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
@@ -2576,6 +2626,7 @@ define <2 x double> @log_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl log
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -2598,6 +2649,7 @@ define <2 x double> @log_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl log
@@ -2684,6 +2736,7 @@ define <4 x double> @log_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -2695,6 +2748,7 @@ define <4 x double> @log_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl log
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -2720,13 +2774,16 @@ define <4 x double> @log_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl log
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl log
@@ -2756,7 +2813,8 @@ define <2 x float> @log_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl logf
-; CHECK-SD-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl logf
@@ -2780,7 +2838,8 @@ define <2 x float> @log_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl logf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
@@ -2806,7 +2865,8 @@ define <3 x float> @log_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl logf
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl logf
@@ -2838,10 +2898,12 @@ define <3 x float> @log_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl logf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
@@ -2869,7 +2931,8 @@ define <4 x float> @log_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl logf
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl logf
@@ -2911,13 +2974,16 @@ define <4 x float> @log_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl logf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
@@ -2947,7 +3013,8 @@ define <8 x float> @log_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl logf
-; CHECK-SD-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl logf
@@ -2972,7 +3039,8 @@ define <8 x float> @log_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl logf
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl logf
@@ -3022,26 +3090,33 @@ define <8 x float> @log_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    str d0, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    str d0, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    str d0, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    str d0, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl logf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
@@ -3832,6 +3907,7 @@ define <2 x double> @log2_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl log2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -3854,6 +3930,7 @@ define <2 x double> @log2_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl log2
@@ -3940,6 +4017,7 @@ define <4 x double> @log2_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -3951,6 +4029,7 @@ define <4 x double> @log2_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl log2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -3976,13 +4055,16 @@ define <4 x double> @log2_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl log2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl log2
@@ -4012,7 +4094,8 @@ define <2 x float> @log2_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log2f
-; CHECK-SD-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log2f
@@ -4036,7 +4119,8 @@ define <2 x float> @log2_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log2f
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
@@ -4062,7 +4146,8 @@ define <3 x float> @log2_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log2f
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log2f
@@ -4094,10 +4179,12 @@ define <3 x float> @log2_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl log2f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
@@ -4125,7 +4212,8 @@ define <4 x float> @log2_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log2f
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log2f
@@ -4167,13 +4255,16 @@ define <4 x float> @log2_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl log2f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
@@ -4203,7 +4294,8 @@ define <8 x float> @log2_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log2f
-; CHECK-SD-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log2f
@@ -4228,7 +4320,8 @@ define <8 x float> @log2_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log2f
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log2f
@@ -4278,26 +4371,33 @@ define <8 x float> @log2_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    str d0, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    str d0, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    str d0, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    str d0, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl log2f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
@@ -5088,6 +5188,7 @@ define <2 x double> @log10_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl log10
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -5110,6 +5211,7 @@ define <2 x double> @log10_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log10
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl log10
@@ -5196,6 +5298,7 @@ define <4 x double> @log10_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log10
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -5207,6 +5310,7 @@ define <4 x double> @log10_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl log10
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -5232,13 +5336,16 @@ define <4 x double> @log10_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log10
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl log10
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log10
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl log10
@@ -5268,7 +5375,8 @@ define <2 x float> @log10_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log10f
-; CHECK-SD-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log10f
@@ -5292,7 +5400,8 @@ define <2 x float> @log10_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log10f
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
@@ -5318,7 +5427,8 @@ define <3 x float> @log10_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log10f
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log10f
@@ -5350,10 +5460,12 @@ define <3 x float> @log10_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl log10f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
@@ -5381,7 +5493,8 @@ define <4 x float> @log10_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log10f
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log10f
@@ -5423,13 +5536,16 @@ define <4 x float> @log10_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl log10f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
@@ -5459,7 +5575,8 @@ define <8 x float> @log10_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log10f
-; CHECK-SD-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log10f
@@ -5484,7 +5601,8 @@ define <8 x float> @log10_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log10f
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log10f
@@ -5534,26 +5652,33 @@ define <8 x float> @log10_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    str d0, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    str d0, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    str d0, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    str d0, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl log10f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
index c30c53835cb44fb..8f3100c82772ba2 100644
--- a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
+++ b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -52,7 +52,8 @@ define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
 ; CHECK-NEON-NEXT:    fmov s0, #9.00000000
 ; CHECK-NEON-NEXT:    bl ldexpf
 ; CHECK-NEON-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEON-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEON-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEON-NEXT:    fmov s0, #9.00000000
 ; CHECK-NEON-NEXT:    fmov w0, s1
 ; CHECK-NEON-NEXT:    bl ldexpf

diff  --git a/llvm/test/CodeGen/AArch64/fpow.ll b/llvm/test/CodeGen/AArch64/fpow.ll
index 79c99c48ce3dc1d..1b3ae7a227924a3 100644
--- a/llvm/test/CodeGen/AArch64/fpow.ll
+++ b/llvm/test/CodeGen/AArch64/fpow.ll
@@ -66,6 +66,7 @@ define <2 x double> @pow_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    mov d1, v1.d[1]
 ; CHECK-SD-NEXT:    bl pow
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -92,9 +93,10 @@ define <2 x double> @pow_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-GI-NEXT:    bl pow
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    fmov d1, d9
+; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl pow
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
@@ -198,6 +200,7 @@ define <4 x double> @pow_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-SD-NEXT:    stp q1, q3, [sp, #48] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d1, v2.d[1]
 ; CHECK-SD-NEXT:    bl pow
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -211,6 +214,7 @@ define <4 x double> @pow_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    mov d1, v1.d[1]
 ; CHECK-SD-NEXT:    bl pow
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #48] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -246,19 +250,22 @@ define <4 x double> @pow_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-GI-NEXT:    mov d9, v4.d[1]
 ; CHECK-GI-NEXT:    bl pow
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    fmov d1, d10
+; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl pow
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl pow
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    fmov d1, d11
+; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl pow
 ; CHECK-GI-NEXT:    ldr q2, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
@@ -290,7 +297,8 @@ define <2 x float> @pow_v2f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl powf
-; CHECK-SD-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
@@ -319,9 +327,10 @@ define <2 x float> @pow_v2f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s1, s9
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl powf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
@@ -347,7 +356,8 @@ define <3 x float> @pow_v3f32(<3 x float> %a, <3 x float> %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl powf
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
@@ -387,13 +397,15 @@ define <3 x float> @pow_v3f32(<3 x float> %a, <3 x float> %b) {
 ; CHECK-GI-NEXT:    mov s11, v1.s[2]
 ; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s1, s10
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s1, s11
+; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl powf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
@@ -422,7 +434,8 @@ define <4 x float> @pow_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl powf
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
@@ -475,17 +488,20 @@ define <4 x float> @pow_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s1, s11
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s1, s12
+; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s10
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s1, s13
+; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl powf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
@@ -517,7 +533,8 @@ define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    stp q1, q3, [sp, #32] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s1, v2.s[1]
 ; CHECK-SD-NEXT:    bl powf
-; CHECK-SD-NEXT:    str d0, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
@@ -546,7 +563,8 @@ define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl powf
-; CHECK-SD-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
@@ -614,32 +632,39 @@ define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-GI-NEXT:    mov s2, v3.s[3]
 ; CHECK-GI-NEXT:    stp s2, s5, [sp, #200] // 8-byte Folded Spill
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    str d0, [sp, #96] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s1, s14
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s1, s15
+; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    str d0, [sp, #80] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s10
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s1, s13
+; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    str d0, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldp q1, q0, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl powf
 ; CHECK-GI-NEXT:    fmov s1, s12
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr s0, [sp, #48] // 4-byte Folded Reload
 ; CHECK-GI-NEXT:    bl powf
 ; CHECK-GI-NEXT:    fmov s1, s11
-; CHECK-GI-NEXT:    str d0, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr s0, [sp, #112] // 4-byte Folded Reload
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    str d0, [sp, #112] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldp s1, s0, [sp, #200] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    bl powf
 ; CHECK-GI-NEXT:    ldp q3, q2, [sp, #16] // 32-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/frem.ll b/llvm/test/CodeGen/AArch64/frem.ll
index 16a6ba3f8cc93f7..e8077b94f2bf688 100644
--- a/llvm/test/CodeGen/AArch64/frem.ll
+++ b/llvm/test/CodeGen/AArch64/frem.ll
@@ -68,6 +68,7 @@ define <2 x double> @frem_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    mov d1, v1.d[1]
 ; CHECK-SD-NEXT:    bl fmod
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -94,9 +95,10 @@ define <2 x double> @frem_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-GI-NEXT:    bl fmod
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    fmov d1, d9
+; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl fmod
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
@@ -200,6 +202,7 @@ define <4 x double> @frem_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-SD-NEXT:    stp q1, q3, [sp, #48] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d1, v2.d[1]
 ; CHECK-SD-NEXT:    bl fmod
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -213,6 +216,7 @@ define <4 x double> @frem_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    mov d1, v1.d[1]
 ; CHECK-SD-NEXT:    bl fmod
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #48] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -248,19 +252,22 @@ define <4 x double> @frem_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-GI-NEXT:    mov d9, v4.d[1]
 ; CHECK-GI-NEXT:    bl fmod
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    fmov d1, d10
+; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl fmod
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl fmod
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    fmov d1, d11
+; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl fmod
 ; CHECK-GI-NEXT:    ldr q2, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
@@ -292,7 +299,8 @@ define <2 x float> @frem_v2f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl fmodf
-; CHECK-SD-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
@@ -321,9 +329,10 @@ define <2 x float> @frem_v2f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s1, s9
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
@@ -349,7 +358,8 @@ define <3 x float> @frem_v3f32(<3 x float> %a, <3 x float> %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl fmodf
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
@@ -389,13 +399,15 @@ define <3 x float> @frem_v3f32(<3 x float> %a, <3 x float> %b) {
 ; CHECK-GI-NEXT:    mov s11, v1.s[2]
 ; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s1, s10
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s1, s11
+; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
@@ -424,7 +436,8 @@ define <4 x float> @frem_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl fmodf
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
@@ -477,17 +490,20 @@ define <4 x float> @frem_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s1, s11
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s1, s12
+; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s10
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s1, s13
+; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
@@ -519,7 +535,8 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    stp q1, q3, [sp, #32] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s1, v2.s[1]
 ; CHECK-SD-NEXT:    bl fmodf
-; CHECK-SD-NEXT:    str d0, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
@@ -548,7 +565,8 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl fmodf
-; CHECK-SD-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
@@ -616,32 +634,39 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-GI-NEXT:    mov s2, v3.s[3]
 ; CHECK-GI-NEXT:    stp s2, s5, [sp, #200] // 8-byte Folded Spill
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    str d0, [sp, #96] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s1, s14
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s1, s15
+; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    str d0, [sp, #80] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s10
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s1, s13
+; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    str d0, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldp q1, q0, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    fmov s1, s12
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr s0, [sp, #48] // 4-byte Folded Reload
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    fmov s1, s11
-; CHECK-GI-NEXT:    str d0, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr s0, [sp, #112] // 4-byte Folded Reload
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    str d0, [sp, #112] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldp s1, s0, [sp, #200] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    ldp q3, q2, [sp, #16] // 32-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/fsincos.ll b/llvm/test/CodeGen/AArch64/fsincos.ll
index 361a1e996fd7acc..ec60fedeb47296b 100644
--- a/llvm/test/CodeGen/AArch64/fsincos.ll
+++ b/llvm/test/CodeGen/AArch64/fsincos.ll
@@ -64,6 +64,7 @@ define <2 x double> @sin_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl sin
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -86,6 +87,7 @@ define <2 x double> @sin_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl sin
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl sin
@@ -172,6 +174,7 @@ define <4 x double> @sin_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl sin
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -183,6 +186,7 @@ define <4 x double> @sin_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl sin
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -208,13 +212,16 @@ define <4 x double> @sin_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl sin
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl sin
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl sin
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl sin
@@ -244,7 +251,8 @@ define <2 x float> @sin_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl sinf
-; CHECK-SD-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl sinf
@@ -268,7 +276,8 @@ define <2 x float> @sin_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl sinf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
@@ -294,7 +303,8 @@ define <3 x float> @sin_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl sinf
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl sinf
@@ -326,10 +336,12 @@ define <3 x float> @sin_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl sinf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
@@ -357,7 +369,8 @@ define <4 x float> @sin_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl sinf
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl sinf
@@ -399,13 +412,16 @@ define <4 x float> @sin_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl sinf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
@@ -435,7 +451,8 @@ define <8 x float> @sin_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl sinf
-; CHECK-SD-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl sinf
@@ -460,7 +477,8 @@ define <8 x float> @sin_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl sinf
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl sinf
@@ -510,26 +528,33 @@ define <8 x float> @sin_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    str d0, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    str d0, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    str d0, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    str d0, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl sinf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
@@ -1320,6 +1345,7 @@ define <2 x double> @cos_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl cos
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -1342,6 +1368,7 @@ define <2 x double> @cos_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl cos
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl cos
@@ -1428,6 +1455,7 @@ define <4 x double> @cos_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl cos
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -1439,6 +1467,7 @@ define <4 x double> @cos_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl cos
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -1464,13 +1493,16 @@ define <4 x double> @cos_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl cos
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl cos
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl cos
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl cos
@@ -1500,7 +1532,8 @@ define <2 x float> @cos_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl cosf
-; CHECK-SD-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl cosf
@@ -1524,7 +1557,8 @@ define <2 x float> @cos_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl cosf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
@@ -1550,7 +1584,8 @@ define <3 x float> @cos_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl cosf
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl cosf
@@ -1582,10 +1617,12 @@ define <3 x float> @cos_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl cosf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
@@ -1613,7 +1650,8 @@ define <4 x float> @cos_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl cosf
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl cosf
@@ -1655,13 +1693,16 @@ define <4 x float> @cos_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl cosf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
@@ -1691,7 +1732,8 @@ define <8 x float> @cos_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl cosf
-; CHECK-SD-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl cosf
@@ -1716,7 +1758,8 @@ define <8 x float> @cos_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl cosf
-; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl cosf
@@ -1766,26 +1809,33 @@ define <8 x float> @cos_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    str d0, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    str d0, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    str d0, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    str d0, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl cosf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/llvm.exp10.ll b/llvm/test/CodeGen/AArch64/llvm.exp10.ll
index ed0d0d519483585..c3f14b0dcff71ee 100644
--- a/llvm/test/CodeGen/AArch64/llvm.exp10.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.exp10.ll
@@ -353,7 +353,8 @@ define <2 x float> @exp10_v2f32(<2 x float> %x) {
 ; SDAG-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    mov s0, v0.s[1]
 ; SDAG-NEXT:    bl exp10f
-; SDAG-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
+; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; SDAG-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; SDAG-NEXT:    bl exp10f
@@ -377,7 +378,8 @@ define <2 x float> @exp10_v2f32(<2 x float> %x) {
 ; GISEL-NEXT:    mov s8, v0.s[1]
 ; GISEL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; GISEL-NEXT:    bl exp10f
-; GISEL-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; GISEL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov s0, s8
 ; GISEL-NEXT:    bl exp10f
 ; GISEL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
@@ -402,7 +404,8 @@ define <3 x float> @exp10_v3f32(<3 x float> %x) {
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    mov s0, v0.s[1]
 ; SDAG-NEXT:    bl exp10f
-; SDAG-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
+; SDAG-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; SDAG-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; SDAG-NEXT:    bl exp10f
@@ -434,10 +437,12 @@ define <3 x float> @exp10_v3f32(<3 x float> %x) {
 ; GISEL-NEXT:    mov s9, v0.s[2]
 ; GISEL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; GISEL-NEXT:    bl exp10f
-; GISEL-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; GISEL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov s0, s8
 ; GISEL-NEXT:    bl exp10f
-; GISEL-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; GISEL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov s0, s9
 ; GISEL-NEXT:    bl exp10f
 ; GISEL-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
@@ -464,7 +469,8 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) {
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    mov s0, v0.s[1]
 ; SDAG-NEXT:    bl exp10f
-; SDAG-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
+; SDAG-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; SDAG-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; SDAG-NEXT:    bl exp10f
@@ -506,13 +512,16 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) {
 ; GISEL-NEXT:    mov s10, v0.s[3]
 ; GISEL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; GISEL-NEXT:    bl exp10f
-; GISEL-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; GISEL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov s0, s8
 ; GISEL-NEXT:    bl exp10f
-; GISEL-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; GISEL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov s0, s9
 ; GISEL-NEXT:    bl exp10f
-; GISEL-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; GISEL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov s0, s10
 ; GISEL-NEXT:    bl exp10f
 ; GISEL-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
@@ -564,6 +573,7 @@ define <2 x double> @exp10_v2f64(<2 x double> %x) {
 ; SDAG-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    mov d0, v0.d[1]
 ; SDAG-NEXT:    bl exp10
+; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -586,6 +596,7 @@ define <2 x double> @exp10_v2f64(<2 x double> %x) {
 ; GISEL-NEXT:    mov d8, v0.d[1]
 ; GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; GISEL-NEXT:    bl exp10
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov d0, d8
 ; GISEL-NEXT:    bl exp10
@@ -670,6 +681,7 @@ define <4 x double> @exp10_v4f64(<4 x double> %x) {
 ; SDAG-NEXT:    mov d0, v0.d[1]
 ; SDAG-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; SDAG-NEXT:    bl exp10
+; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -681,6 +693,7 @@ define <4 x double> @exp10_v4f64(<4 x double> %x) {
 ; SDAG-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; SDAG-NEXT:    mov d0, v0.d[1]
 ; SDAG-NEXT:    bl exp10
+; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -706,13 +719,16 @@ define <4 x double> @exp10_v4f64(<4 x double> %x) {
 ; GISEL-NEXT:    mov d9, v1.d[1]
 ; GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; GISEL-NEXT:    bl exp10
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov d0, d8
 ; GISEL-NEXT:    bl exp10
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; GISEL-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; GISEL-NEXT:    bl exp10
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov d0, d9
 ; GISEL-NEXT:    bl exp10

diff  --git a/llvm/test/CodeGen/AArch64/pow.ll b/llvm/test/CodeGen/AArch64/pow.ll
index 5141b21c7976a90..495541e9fd949da 100644
--- a/llvm/test/CodeGen/AArch64/pow.ll
+++ b/llvm/test/CodeGen/AArch64/pow.ll
@@ -75,7 +75,8 @@ define <4 x float> @pow_v4f32_one_fourth_not_enough_fmf(<4 x float> %x) nounwind
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl powf
 ; CHECK-NEXT:    fmov s1, #0.25000000
-; CHECK-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl powf
@@ -116,6 +117,7 @@ define <2 x double> @pow_v2f64_one_fourth_not_enough_fmf(<2 x double> %x) nounwi
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl pow
 ; CHECK-NEXT:    fmov d1, #0.25000000
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0

diff  --git a/llvm/test/CodeGen/AArch64/spill-fold.mir b/llvm/test/CodeGen/AArch64/spill-fold.mir
index 02d409d04da71d0..b1e7ebe3a7e82b6 100644
--- a/llvm/test/CodeGen/AArch64/spill-fold.mir
+++ b/llvm/test/CodeGen/AArch64/spill-fold.mir
@@ -1,10 +1,13 @@
 # RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass greedy -verify-machineinstrs  -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64_be-none-linux-gnu -run-pass greedy -verify-machineinstrs  -o - %s | FileCheck %s
 --- |
   define i64 @test_subreg_spill_fold() { ret i64 0 }
   define i64 @test_subreg_spill_fold2() { ret i64 0 }
   define i64 @test_subreg_spill_fold3() { ret i64 0 }
+  define <4 x float> @test_subreg_spill_fold4() { ret <4 x float> undef }
   define i64 @test_subreg_fill_fold() { ret i64 0 }
   define double @test_subreg_fill_fold2() { ret double 0.0 }
+  define <4 x float> @test_subreg_fill_fold3() { ret <4 x float> undef }
   define i64 @test_nzcv_spill_fold() { ret i64 0 }
 ...
 ---
@@ -50,6 +53,21 @@ body:             |
     RET_ReallyLR implicit $x0
 ...
 ---
+# CHECK-LABEL: name: test_subreg_spill_fold4
+# A spilled write to a 128-bit register needs a 128-bit store
+name:            test_subreg_spill_fold4
+registers:
+  - { id: 0, class: fpr128 }
+body:             |
+  bb.0:
+    ; CHECK:      undef %1.ssub:fpr128 = COPY $wzr
+    ; CHECK-NEXT: STRQui %1, %stack.0, 0 :: (store (s128) into %stack.0)
+    undef %0.ssub:fpr128 = COPY $wzr
+    INLINEASM &nop, 1, 12, implicit-def dead $d0, 12, implicit-def dead $d1, 12, implicit-def dead $d2, 12, implicit-def dead $d3, 12, implicit-def dead $d4, 12, implicit-def dead $d5, 12, implicit-def dead $d6, 12, implicit-def dead $d7, 12, implicit-def dead $d8, 12, implicit-def dead $d9, 12, implicit-def dead $d10, 12, implicit-def dead $d11, 12, implicit-def dead $d12, 12, implicit-def dead $d13, 12, implicit-def dead $d14, 12, implicit-def dead $d15, 12, implicit-def dead $d16, 12, implicit-def dead $d17, 12, implicit-def dead $d18, 12, implicit-def dead $d19, 12, implicit-def dead $d20, 12, implicit-def dead $d21, 12, implicit-def dead $d22, 12, implicit-def dead $d23, 12, implicit-def dead $d24, 12, implicit-def dead $d25, 12, implicit-def dead $d26, 12, implicit-def dead $d27, 12, implicit-def dead $d28, 12, implicit-def dead $d29, 12, implicit-def dead $d30, 12, implicit-def $d31
+    $q0 = COPY %0
+    RET_ReallyLR implicit $q0
+...
+---
 # CHECK-LABEL: name: test_subreg_fill_fold
 # Ensure that the filled COPY is eliminated and folded into the fill load.
 name:            test_subreg_fill_fold
@@ -82,6 +100,25 @@ body:             |
     RET_ReallyLR implicit $d0
 ...
 ---
+# CHECK-LABEL: name: test_subreg_fill_fold3
+# A filled copy from a sub-register should load the whole register
+name:              test_subreg_fill_fold3
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: fpr128 }
+liveins:
+  - { reg: '$q0' }
+body:             |
+  bb.0:
+    liveins: $q0
+    %0 = COPY $q0
+    INLINEASM &nop, 1, 12, implicit-def dead $d0, 12, implicit-def dead $d1, 12, implicit-def dead $d2, 12, implicit-def dead $d3, 12, implicit-def dead $d4, 12, implicit-def dead $d5, 12, implicit-def dead $d6, 12, implicit-def dead $d7, 12, implicit-def dead $d8, 12, implicit-def dead $d9, 12, implicit-def dead $d10, 12, implicit-def dead $d11, 12, implicit-def dead $d12, 12, implicit-def dead $d13, 12, implicit-def dead $d14, 12, implicit-def dead $d15, 12, implicit-def dead $d16, 12, implicit-def dead $d17, 12, implicit-def dead $d18, 12, implicit-def dead $d19, 12, implicit-def dead $d20, 12, implicit-def dead $d21, 12, implicit-def dead $d22, 12, implicit-def dead $d23, 12, implicit-def dead $d24, 12, implicit-def dead $d25, 12, implicit-def dead $d26, 12, implicit-def dead $d27, 12, implicit-def dead $d28, 12, implicit-def dead $d29, 12, implicit-def dead $d30, 12, implicit-def $d31
+    ; CHECK:      %1:fpr128 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
+    ; CHECK-NEXT: $s0 = COPY %1.ssub
+    $s0 = COPY %0.ssub
+    RET_ReallyLR implicit $s0
+...
+---
 # CHECK-LABEL: name: test_nzcv_spill_fold
 # Ensure that nzcv COPY cannot be folded.
 name:            test_nzcv_spill_fold

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
index 9b0d86556f0b72b..7ac1b58c626d191 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
@@ -75,6 +75,7 @@ define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [x0, #48]
 ; CHECK-NEXT:    bl __trunctfdf2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
@@ -85,6 +86,7 @@ define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
@@ -98,6 +100,7 @@ define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
@@ -108,6 +111,7 @@ define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2

diff  --git a/llvm/test/CodeGen/AArch64/vec-libcalls.ll b/llvm/test/CodeGen/AArch64/vec-libcalls.ll
index e1b4967ed0fb93d..aa2cca40a92c2cf 100644
--- a/llvm/test/CodeGen/AArch64/vec-libcalls.ll
+++ b/llvm/test/CodeGen/AArch64/vec-libcalls.ll
@@ -56,7 +56,8 @@ define <2 x float> @sin_v2f32(<2 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl sinf
-; CHECK-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl sinf
@@ -79,7 +80,8 @@ define <3 x float> @sin_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl sinf
-; CHECK-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl sinf
@@ -109,7 +111,8 @@ define <4 x float> @sin_v4f32(<4 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl sinf
-; CHECK-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl sinf
@@ -271,7 +274,8 @@ define <3 x float> @cos_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl cosf
-; CHECK-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl cosf
@@ -301,7 +305,8 @@ define <3 x float> @exp_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl expf
-; CHECK-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl expf
@@ -331,7 +336,8 @@ define <3 x float> @exp2_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl exp2f
-; CHECK-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl exp2f
@@ -370,7 +376,8 @@ define <3 x float> @log_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl logf
-; CHECK-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl logf
@@ -400,7 +407,8 @@ define <3 x float> @log10_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl log10f
-; CHECK-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl log10f
@@ -430,7 +438,8 @@ define <3 x float> @log2_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl log2f
-; CHECK-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl log2f


        


More information about the llvm-commits mailing list