[llvm] 4c28d21 - [AArch64] Avoid single-element vector fp converts in streaming[-compatible] functions (#112213)

via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 16 02:00:53 PDT 2024


Author: Benjamin Maxwell
Date: 2024-10-16T10:00:49+01:00
New Revision: 4c28d21f6af70ffee33660de35b263283dc32139

URL: https://github.com/llvm/llvm-project/commit/4c28d21f6af70ffee33660de35b263283dc32139
DIFF: https://github.com/llvm/llvm-project/commit/4c28d21f6af70ffee33660de35b263283dc32139.diff

LOG: [AArch64] Avoid single-element vector fp converts in streaming[-compatible] functions (#112213)

The single-element vector variants of FCVTZS, FCVTZU, UCVTF, and SCVTF
are only supported in streaming[-compatible] functions with `+sme2p2`.

Reference:
-
https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/FCVTZS--vector--integer---Floating-point-convert-to-signed-integer--rounding-toward-zero--vector--
-
https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/UCVTF--vector--integer---Unsigned-integer-convert-to-floating-point--vector--
-
https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/SCVTF--vector--integer---Signed-integer-convert-to-floating-point--vector--

Codegen will be improved in follow up patches.

Added: 
    llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 325508b62a9f14..32f2c7c71d1757 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6237,7 +6237,8 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
 // Some float -> int -> float conversion patterns for which we want to keep the
 // int values in FP registers using the corresponding NEON instructions to
 // avoid more costly int <-> fp register transfers.
-let Predicates = [HasNEONandIsStreamingSafe] in {
+// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
+let Predicates = [HasNEON] in {
 def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
           (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
 def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
@@ -6247,7 +6248,8 @@ def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
 def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
           (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
 
-let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
+// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
+let Predicates = [HasNEON, HasFullFP16] in {
 def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
           (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
 def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
@@ -6270,9 +6272,10 @@ def : Pat<(f64 (uint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))),
 
 // fp16: integer extraction from vector must be at least 32-bits to be legal.
 // Actual extraction result is then an in-reg sign-extension of lower 16-bits.
-let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
-def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract 
-                (v8i16 FPR128:$Rn), (i64 0))), i16)))), 
+// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
+let Predicates = [HasNEON, HasFullFP16] in {
+def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract
+                (v8i16 FPR128:$Rn), (i64 0))), i16)))),
           (SCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>;
 
 // unsigned 32-bit extracted element is truncated to 16-bits using AND
@@ -6367,7 +6370,7 @@ def : Pat <(f64 (uint_to_fp (i32
                           (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
 // 64-bits -> double are handled in target specific dag combine:
 // performIntToFpCombine.
-} // let Predicates = [HasNEONandIsStreamingSafe]
+} // let Predicates = [HasNEON]
 
 //===----------------------------------------------------------------------===//
 // Advanced SIMD three 
diff erent-sized vector instructions.

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
new file mode 100644
index 00000000000000..9aadf3133ba197
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc < %s | FileCheck %s --check-prefix=NON-STREAMING
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define double @t1(double %x) {
+; CHECK-LABEL: t1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs x8, d0
+; CHECK-NEXT:    scvtf d0, x8
+; CHECK-NEXT:    ret
+;
+; NON-STREAMING-LABEL: t1:
+; NON-STREAMING:       // %bb.0: // %entry
+; NON-STREAMING-NEXT:    fcvtzs d0, d0
+; NON-STREAMING-NEXT:    scvtf d0, d0
+; NON-STREAMING-NEXT:    ret
+entry:
+  %conv = fptosi double %x to i64
+  %conv1 = sitofp i64 %conv to double
+  ret double %conv1
+}
+
+define float @t2(float %x) {
+; CHECK-LABEL: t2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs w8, s0
+; CHECK-NEXT:    scvtf s0, w8
+; CHECK-NEXT:    ret
+;
+; NON-STREAMING-LABEL: t2:
+; NON-STREAMING:       // %bb.0: // %entry
+; NON-STREAMING-NEXT:    fcvtzs s0, s0
+; NON-STREAMING-NEXT:    scvtf s0, s0
+; NON-STREAMING-NEXT:    ret
+entry:
+  %conv = fptosi float %x to i32
+  %conv1 = sitofp i32 %conv to float
+  ret float %conv1
+}
+
+define half @t3(half %x)  {
+; CHECK-LABEL: t3:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    fcvtzs w8, s0
+; CHECK-NEXT:    scvtf s0, w8
+; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    ret
+;
+; NON-STREAMING-LABEL: t3:
+; NON-STREAMING:       // %bb.0: // %entry
+; NON-STREAMING-NEXT:    fcvt s0, h0
+; NON-STREAMING-NEXT:    fcvtzs s0, s0
+; NON-STREAMING-NEXT:    scvtf s0, s0
+; NON-STREAMING-NEXT:    fcvt h0, s0
+; NON-STREAMING-NEXT:    ret
+entry:
+  %conv = fptosi half %x to i32
+  %conv1 = sitofp i32 %conv to half
+  ret half %conv1
+}
+
+define double @t4(double %x) {
+; CHECK-LABEL: t4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzu x8, d0
+; CHECK-NEXT:    ucvtf d0, x8
+; CHECK-NEXT:    ret
+;
+; NON-STREAMING-LABEL: t4:
+; NON-STREAMING:       // %bb.0: // %entry
+; NON-STREAMING-NEXT:    fcvtzu d0, d0
+; NON-STREAMING-NEXT:    ucvtf d0, d0
+; NON-STREAMING-NEXT:    ret
+entry:
+  %conv = fptoui double %x to i64
+  %conv1 = uitofp i64 %conv to double
+  ret double %conv1
+}
+
+define float @t5(float %x) {
+; CHECK-LABEL: t5:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzu w8, s0
+; CHECK-NEXT:    ucvtf s0, w8
+; CHECK-NEXT:    ret
+;
+; NON-STREAMING-LABEL: t5:
+; NON-STREAMING:       // %bb.0: // %entry
+; NON-STREAMING-NEXT:    fcvtzu s0, s0
+; NON-STREAMING-NEXT:    ucvtf s0, s0
+; NON-STREAMING-NEXT:    ret
+entry:
+  %conv = fptoui float %x to i32
+  %conv1 = uitofp i32 %conv to float
+  ret float %conv1
+}
+
+define half @t6(half %x)  {
+; CHECK-LABEL: t6:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    fcvtzu w8, s0
+; CHECK-NEXT:    ucvtf s0, w8
+; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    ret
+;
+; NON-STREAMING-LABEL: t6:
+; NON-STREAMING:       // %bb.0: // %entry
+; NON-STREAMING-NEXT:    fcvt s0, h0
+; NON-STREAMING-NEXT:    fcvtzu s0, s0
+; NON-STREAMING-NEXT:    ucvtf s0, s0
+; NON-STREAMING-NEXT:    fcvt h0, s0
+; NON-STREAMING-NEXT:    ret
+entry:
+  %conv = fptoui half %x to i32
+  %conv1 = uitofp i32 %conv to half
+  ret half %conv1
+}

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
index afd3bb7161c155..0c712a15d4de2f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
@@ -21,20 +21,20 @@ define <4 x half> @ucvtf_v4i16_v4f16(<4 x i16> %op1) {
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #6]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #6]
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #4]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #14]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #4]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #2]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #12]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #2]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #10]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
@@ -58,36 +58,36 @@ define void @ucvtf_v8i16_v8f16(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    ldr q0, [x0]
 ; NONEON-NOSVE-NEXT:    str q0, [sp, #-32]!
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #14]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #14]
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #12]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #30]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #12]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #10]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #28]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #10]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #8]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #26]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #6]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #6]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #4]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #22]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #4]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #2]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #20]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #2]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
@@ -115,68 +115,68 @@ define void @ucvtf_v16i16_v16f16(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-64]!
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #30]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #62]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #28]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #60]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #26]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #58]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #24]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #56]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #54]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #52]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #50]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #14]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #14]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #12]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #46]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #12]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #10]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #44]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #10]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #8]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #42]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #6]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #6]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #4]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #38]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #4]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #2]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #36]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #2]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp]
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #34]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    str h0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
@@ -207,11 +207,11 @@ define <2 x float> @ucvtf_v2i16_v2f32(<2 x i16> %op1) {
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #4]
-; NONEON-NOSVE-NEXT:    ucvtf s1, s0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
-; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #4]
+; NONEON-NOSVE-NEXT:    ldrh w9, [sp]
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ucvtf s1, w9
+; NONEON-NOSVE-NEXT:    stp s1, s0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
@@ -234,15 +234,15 @@ define <4 x float> @ucvtf_v4i16_v4f32(<4 x i16> %op1) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #32
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #14]
-; NONEON-NOSVE-NEXT:    ucvtf s1, s0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #12]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #14]
+; NONEON-NOSVE-NEXT:    ucvtf s1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #12]
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #10]
 ; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #10]
-; NONEON-NOSVE-NEXT:    ucvtf s1, s0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
 ; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #32
@@ -271,25 +271,25 @@ define void @ucvtf_v8i16_v8f32(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #30]
-; NONEON-NOSVE-NEXT:    ucvtf s1, s0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #28]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
+; NONEON-NOSVE-NEXT:    ucvtf s1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
 ; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #56]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #26]
-; NONEON-NOSVE-NEXT:    ucvtf s1, s0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #24]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
 ; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
-; NONEON-NOSVE-NEXT:    ucvtf s1, s0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
 ; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
-; NONEON-NOSVE-NEXT:    ucvtf s1, s0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
 ; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x1]
@@ -328,47 +328,47 @@ define void @ucvtf_v16i16_v16f32(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #46]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #46]
-; NONEON-NOSVE-NEXT:    ucvtf s1, s0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #44]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #44]
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #42]
 ; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #88]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #42]
-; NONEON-NOSVE-NEXT:    ucvtf s1, s0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #40]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #40]
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #38]
 ; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #80]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #38]
-; NONEON-NOSVE-NEXT:    ucvtf s1, s0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #36]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #36]
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #34]
 ; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #72]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #34]
-; NONEON-NOSVE-NEXT:    ucvtf s1, s0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #32]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #32]
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #62]
 ; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #62]
+; NONEON-NOSVE-NEXT:    ucvtf s1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #60]
 ; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #64]
-; NONEON-NOSVE-NEXT:    ucvtf s1, s0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #60]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #58]
 ; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #58]
-; NONEON-NOSVE-NEXT:    ucvtf s1, s0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #56]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #56]
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #54]
 ; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #54]
-; NONEON-NOSVE-NEXT:    ucvtf s1, s0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #52]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #52]
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #50]
 ; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #104]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #50]
-; NONEON-NOSVE-NEXT:    ucvtf s1, s0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #48]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ucvtf s1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #48]
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
 ; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #96]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #96]
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x1]
@@ -399,8 +399,8 @@ define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
 ; NONEON-NOSVE-NEXT:    str d0, [sp]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp], #16
 ; NONEON-NOSVE-NEXT:    ret
@@ -424,11 +424,11 @@ define <2 x double> @ucvtf_v2i16_v2f64(<2 x i16> %op1) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #32
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #12]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #12]
+; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #8]
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #32
 ; NONEON-NOSVE-NEXT:    ret
@@ -464,15 +464,13 @@ define void @ucvtf_v4i16_v4f64(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #16]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #44]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #40]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #40]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #36]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #32]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #48]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x1]
@@ -529,27 +527,23 @@ define void @ucvtf_v8i16_v8f64(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #88]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #92]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #88]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #80]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #84]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #80]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #72]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #76]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #128]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #72]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #68]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #64]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #96]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #96]
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x1]
@@ -649,49 +643,42 @@ define void @ucvtf_v16i16_v16f64(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #104]
 ; NONEON-NOSVE-NEXT:    str d1, [sp, #328]
 ; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #104]
-; NONEON-NOSVE-NEXT:    str d0, [sp, #168]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #164]
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #160]
 ; NONEON-NOSVE-NEXT:    stp d1, d2, [sp, #176]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #160]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    str d0, [sp, #168]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #152]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #240]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #156]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #152]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #144]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #224]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #148]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #144]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #136]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #208]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #140]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #136]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
+; NONEON-NOSVE-NEXT:    ldr w8, [sp, #332]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #192]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #332]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldr w8, [sp, #328]
 ; NONEON-NOSVE-NEXT:    ldp q4, q3, [sp, #192]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #328]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #184]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #304]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #188]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #184]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #176]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #288]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #180]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #168]
 ; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #288]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #176]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #272]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #172]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #168]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #256]
 ; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #224]
 ; NONEON-NOSVE-NEXT:    ldp q2, q5, [sp, #256]
@@ -1041,10 +1028,9 @@ define <2 x double> @ucvtf_v2i32_v2f64(<2 x i32> %op1) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #32
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #12]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #8]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #8]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #32
@@ -1073,15 +1059,13 @@ define void @ucvtf_v4i32_v4f64(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #28]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #24]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #24]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #16]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x1]
@@ -1120,27 +1104,23 @@ define void @ucvtf_v8i32_v8f64(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #40]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #44]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #40]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #36]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #32]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #56]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #60]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #48]
 ; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #64]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #56]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #52]
-; NONEON-NOSVE-NEXT:    ucvtf d1, d0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #48]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #96]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #96]
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x1]
@@ -2984,8 +2964,8 @@ define half @ucvtf_i16_f16(ptr %0) {
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_i16_f16:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    ldr h0, [x0]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ldrh w8, [x0]
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
 ; NONEON-NOSVE-NEXT:    fcvt h0, s0
 ; NONEON-NOSVE-NEXT:    ret
   %2 = load i16, ptr %0, align 64
@@ -2996,14 +2976,14 @@ define half @ucvtf_i16_f16(ptr %0) {
 define float @ucvtf_i16_f32(ptr %0) {
 ; CHECK-LABEL: ucvtf_i16_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr h0, [x0]
-; CHECK-NEXT:    ucvtf s0, s0
+; CHECK-NEXT:    ldrh w8, [x0]
+; CHECK-NEXT:    ucvtf s0, w8
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_i16_f32:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    ldr h0, [x0]
-; NONEON-NOSVE-NEXT:    ucvtf s0, s0
+; NONEON-NOSVE-NEXT:    ldrh w8, [x0]
+; NONEON-NOSVE-NEXT:    ucvtf s0, w8
 ; NONEON-NOSVE-NEXT:    ret
   %2 = load i16, ptr %0, align 64
   %3 = uitofp i16 %2 to float
@@ -3013,14 +2993,14 @@ define float @ucvtf_i16_f32(ptr %0) {
 define double @ucvtf_i16_f64(ptr %0) {
 ; CHECK-LABEL: ucvtf_i16_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr h0, [x0]
-; CHECK-NEXT:    ucvtf d0, d0
+; CHECK-NEXT:    ldrh w8, [x0]
+; CHECK-NEXT:    ucvtf d0, w8
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_i16_f64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    ldr h0, [x0]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ldrh w8, [x0]
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
 ; NONEON-NOSVE-NEXT:    ret
   %2 = load i16, ptr %0, align 64
   %3 = uitofp i16 %2 to double
@@ -3065,14 +3045,14 @@ define float @ucvtf_i32_f32(ptr %0) {
 define double @ucvtf_i32_f64(ptr %0) {
 ; CHECK-LABEL: ucvtf_i32_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr s0, [x0]
-; CHECK-NEXT:    ucvtf d0, d0
+; CHECK-NEXT:    ldr w8, [x0]
+; CHECK-NEXT:    ucvtf d0, w8
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_i32_f64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    ldr s0, [x0]
-; NONEON-NOSVE-NEXT:    ucvtf d0, d0
+; NONEON-NOSVE-NEXT:    ldr w8, [x0]
+; NONEON-NOSVE-NEXT:    ucvtf d0, w8
 ; NONEON-NOSVE-NEXT:    ret
   %2 = load i32, ptr %0, align 64
   %3 = uitofp i32 %2 to double


        


More information about the llvm-commits mailing list