[llvm] 59ed6df - [LLVM][CodeGen][SVE] Use DUPM for constantfp splats. (#168391)

via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 18 04:15:43 PST 2025


Author: Paul Walker
Date: 2025-11-18T12:15:38Z
New Revision: 59ed6dfe97b35a4dc88f69e3d830edf8caa99d10

URL: https://github.com/llvm/llvm-project/commit/59ed6dfe97b35a4dc88f69e3d830edf8caa99d10
DIFF: https://github.com/llvm/llvm-project/commit/59ed6dfe97b35a4dc88f69e3d830edf8caa99d10.diff

LOG: [LLVM][CodeGen][SVE] Use DUPM for constantfp splats. (#168391)

This helps cases where the immediate range of FDUP is not sufficient.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/lib/Target/AArch64/SVEInstrFormats.td
    llvm/test/CodeGen/AArch64/sve-bf16-combines.ll
    llvm/test/CodeGen/AArch64/sve-fp-combine.ll
    llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
    llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
    llvm/test/CodeGen/AArch64/sve-llrint.ll
    llvm/test/CodeGen/AArch64/sve-lrint.ll
    llvm/test/CodeGen/AArch64/sve-vector-splat.ll
    llvm/test/CodeGen/AArch64/sve-vselect-imm.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index f1db05dda4e40..08466667c0fa5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -4403,43 +4403,46 @@ bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
 
 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
                                               bool Invert) {
-  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
-    uint64_t ImmVal = CNode->getZExtValue();
-    SDLoc DL(N);
-
-    if (Invert)
-      ImmVal = ~ImmVal;
+  uint64_t ImmVal;
+  if (auto CI = dyn_cast<ConstantSDNode>(N))
+    ImmVal = CI->getZExtValue();
+  else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
+    ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+  else
+    return false;
 
-    // Shift mask depending on type size.
-    switch (VT.SimpleTy) {
-    case MVT::i8:
-      ImmVal &= 0xFF;
-      ImmVal |= ImmVal << 8;
-      ImmVal |= ImmVal << 16;
-      ImmVal |= ImmVal << 32;
-      break;
-    case MVT::i16:
-      ImmVal &= 0xFFFF;
-      ImmVal |= ImmVal << 16;
-      ImmVal |= ImmVal << 32;
-      break;
-    case MVT::i32:
-      ImmVal &= 0xFFFFFFFF;
-      ImmVal |= ImmVal << 32;
-      break;
-    case MVT::i64:
-      break;
-    default:
-      llvm_unreachable("Unexpected type");
-    }
+  if (Invert)
+    ImmVal = ~ImmVal;
 
-    uint64_t encoding;
-    if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
-      Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
-      return true;
-    }
+  // Shift mask depending on type size.
+  switch (VT.SimpleTy) {
+  case MVT::i8:
+    ImmVal &= 0xFF;
+    ImmVal |= ImmVal << 8;
+    ImmVal |= ImmVal << 16;
+    ImmVal |= ImmVal << 32;
+    break;
+  case MVT::i16:
+    ImmVal &= 0xFFFF;
+    ImmVal |= ImmVal << 16;
+    ImmVal |= ImmVal << 32;
+    break;
+  case MVT::i32:
+    ImmVal &= 0xFFFFFFFF;
+    ImmVal |= ImmVal << 32;
+    break;
+  case MVT::i64:
+    break;
+  default:
+    llvm_unreachable("Unexpected type");
   }
-  return false;
+
+  uint64_t encoding;
+  if (!AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding))
+    return false;
+
+  Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
+  return true;
 }
 
 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth.

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c8c21c4822ffe..e99b3f8ff07e0 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -989,7 +989,7 @@ let Predicates = [HasSVE_or_SME] in {
             (DUP_ZR_D (MOVi64imm (bitcast_fpimm_to_i64 f64:$val)))>;
 
   // Duplicate FP immediate into all vector elements
-  let AddedComplexity = 2 in {
+  let AddedComplexity = 3 in {
     def : Pat<(nxv8f16 (splat_vector fpimm16:$imm8)),
               (FDUP_ZI_H fpimm16:$imm8)>;
     def : Pat<(nxv4f16 (splat_vector fpimm16:$imm8)),

diff  --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 1664f4ad0c8fa..1e771e1fb9403 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -347,6 +347,11 @@ def SVELogicalImm16Pat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i16>",
 def SVELogicalImm32Pat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i32>", []>;
 def SVELogicalImm64Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64>", []>;
 
+def SVELogicalFPImm16Pat : ComplexPattern<f16, 1, "SelectSVELogicalImm<MVT::i16>", []>;
+def SVELogicalFPImm32Pat : ComplexPattern<f32, 1, "SelectSVELogicalImm<MVT::i32>", []>;
+def SVELogicalFPImm64Pat : ComplexPattern<f64, 1, "SelectSVELogicalImm<MVT::i64>", []>;
+def SVELogicalBFPImmPat : ComplexPattern<bf16, 1, "SelectSVELogicalImm<MVT::i16>", []>;
+
 def SVELogicalImm8NotPat  : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i8, true>", []>;
 def SVELogicalImm16NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i16, true>", []>;
 def SVELogicalImm32NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i32, true>", []>;
@@ -2160,6 +2165,26 @@ multiclass sve_int_dup_mask_imm<string asm> {
             (!cast<Instruction>(NAME) i64:$imm)>;
   def : Pat<(nxv2i64 (splat_vector (i64 (SVELogicalImm64Pat i64:$imm)))),
             (!cast<Instruction>(NAME) i64:$imm)>;
+
+  def : Pat<(nxv8f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv4f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv2f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv4f32 (splat_vector (f32 (SVELogicalFPImm32Pat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv2f32 (splat_vector (f32 (SVELogicalFPImm32Pat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv2f64 (splat_vector (f64 (SVELogicalFPImm64Pat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+
+  def : Pat<(nxv8bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv4bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv2bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
 }
 
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll
index 16e8feb0dc5bb..fc3e018f2ec7a 100644
--- a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll
+++ b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll
@@ -632,7 +632,6 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    uunpkhi z3.s, z2.h
 ; SVE-NEXT:    uunpkhi z4.s, z1.h
-; SVE-NEXT:    mov w8, #32768 // =0x8000
 ; SVE-NEXT:    uunpklo z2.s, z2.h
 ; SVE-NEXT:    uunpklo z1.s, z1.h
 ; SVE-NEXT:    ptrue p1.s
@@ -643,9 +642,8 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
 ; SVE-NEXT:    fmul z3.s, z4.s, z3.s
 ; SVE-NEXT:    fmul z1.s, z1.s, z2.s
 ; SVE-NEXT:    bfcvt z2.h, p1/m, z3.s
-; SVE-NEXT:    fmov h3, w8
+; SVE-NEXT:    dupm z3.h, #0x8000
 ; SVE-NEXT:    bfcvt z1.h, p1/m, z1.s
-; SVE-NEXT:    mov z3.h, h3
 ; SVE-NEXT:    uzp1 z1.h, z1.h, z2.h
 ; SVE-NEXT:    sel z1.h, p0, z1.h, z3.h
 ; SVE-NEXT:    uunpkhi z3.s, z0.h
@@ -665,10 +663,8 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
 ;
 ; SVE-B16B16-LABEL: fsub_sel_fmul_negzero_nxv8bf16:
 ; SVE-B16B16:       // %bb.0:
-; SVE-B16B16-NEXT:    mov w8, #32768 // =0x8000
+; SVE-B16B16-NEXT:    dupm z3.h, #0x8000
 ; SVE-B16B16-NEXT:    bfmul z1.h, z1.h, z2.h
-; SVE-B16B16-NEXT:    fmov h3, w8
-; SVE-B16B16-NEXT:    mov z3.h, h3
 ; SVE-B16B16-NEXT:    sel z1.h, p0, z1.h, z3.h
 ; SVE-B16B16-NEXT:    bfsub z0.h, z0.h, z1.h
 ; SVE-B16B16-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-fp-combine.ll b/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
index 53aba04028d62..57389ad2fe9b2 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
@@ -1134,10 +1134,9 @@ define <vscale x 2 x double> @fadd_sel_fmul_d_negzero(<vscale x 2 x double> %a,
 define <vscale x 8 x half> @fsub_sel_fmul_h_negzero(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x i1> %mask) {
 ; CHECK-LABEL: fsub_sel_fmul_h_negzero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-NEXT:    dupm z3.h, #0x8000
 ; CHECK-NEXT:    fmul z1.h, z1.h, z2.h
-; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    sel z1.h, p0, z1.h, z2.h
+; CHECK-NEXT:    sel z1.h, p0, z1.h, z3.h
 ; CHECK-NEXT:    fsub z0.h, z0.h, z1.h
 ; CHECK-NEXT:    ret
   %fmul = fmul <vscale x 8 x half> %b, %c
@@ -1150,10 +1149,9 @@ define <vscale x 8 x half> @fsub_sel_fmul_h_negzero(<vscale x 8 x half> %a, <vsc
 define <vscale x 4 x float> @fsub_sel_fmul_s_negzero(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %mask) {
 ; CHECK-LABEL: fsub_sel_fmul_s_negzero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
+; CHECK-NEXT:    mov z3.s, #0x80000000
 ; CHECK-NEXT:    fmul z1.s, z1.s, z2.s
-; CHECK-NEXT:    mov z2.s, w8
-; CHECK-NEXT:    sel z1.s, p0, z1.s, z2.s
+; CHECK-NEXT:    sel z1.s, p0, z1.s, z3.s
 ; CHECK-NEXT:    fsub z0.s, z0.s, z1.s
 ; CHECK-NEXT:    ret
   %fmul = fmul <vscale x 4 x float> %b, %c
@@ -1166,10 +1164,9 @@ define <vscale x 4 x float> @fsub_sel_fmul_s_negzero(<vscale x 4 x float> %a, <v
 define <vscale x 2 x double> @fsub_sel_fmul_d_negzero(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %mask) {
 ; CHECK-LABEL: fsub_sel_fmul_d_negzero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-NEXT:    mov z3.d, #0x8000000000000000
 ; CHECK-NEXT:    fmul z1.d, z1.d, z2.d
-; CHECK-NEXT:    mov z2.d, x8
-; CHECK-NEXT:    sel z1.d, p0, z1.d, z2.d
+; CHECK-NEXT:    sel z1.d, p0, z1.d, z3.d
 ; CHECK-NEXT:    fsub z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %fmul = fmul <vscale x 2 x double> %b, %c

diff  --git a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
index 8750867c56731..1223ae1c0cbdd 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
@@ -51,10 +51,9 @@ define half @fadda_nxv6f16(<vscale x 6 x half> %v, half %s) {
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-NEXT:    dupm z2.h, #0x8000
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    str z0, [sp]
-; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    fmov s0, s1
 ; CHECK-NEXT:    st1h { z2.d }, p0, [sp, #3, mul vl]
 ; CHECK-NEXT:    ptrue p0.h
@@ -77,12 +76,11 @@ define half @fadda_nxv10f16(<vscale x 10 x half> %v, half %s) {
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
 ; CHECK-NEXT:    str z1, [sp]
+; CHECK-NEXT:    addvl x8, sp, #1
 ; CHECK-NEXT:    ptrue p1.d
 ; CHECK-NEXT:    fadda h2, p0, h2, z0.h
-; CHECK-NEXT:    mov z0.h, w8
-; CHECK-NEXT:    addvl x8, sp, #1
+; CHECK-NEXT:    dupm z0.h, #0x8000
 ; CHECK-NEXT:    st1h { z0.d }, p1, [sp, #1, mul vl]
 ; CHECK-NEXT:    ldr z1, [sp]
 ; CHECK-NEXT:    str z1, [sp, #1, mul vl]
@@ -105,11 +103,10 @@ define half @fadda_nxv12f16(<vscale x 12 x half> %v, half %s) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    fadda h2, p0, h2, z0.h
-; CHECK-NEXT:    uunpklo z0.s, z1.h
-; CHECK-NEXT:    mov z1.h, w8
-; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    uzp1 z0.h, z1.h, z0.h
 ; CHECK-NEXT:    fadda h2, p0, h2, z0.h
 ; CHECK-NEXT:    fmov s0, s2
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
index 4ae7ac7b292e9..897ade00320db 100644
--- a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
@@ -454,18 +454,17 @@ declare <vscale x 4 x i64> @llvm.fptosi.sat.nxv4f16.nxv4i64(<vscale x 4 x half>)
 define <vscale x 2 x i32> @test_signed_v2f16_v2i32(<vscale x 2 x half> %f) {
 ; CHECK-LABEL: test_signed_v2f16_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
-; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    mov z2.d, #0xffffffff80000000
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.d, #0xffffffff80000000
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z0.h, z2.h
-; CHECK-NEXT:    mov z2.d, #0x7fffffff
+; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    fcvtzs z2.d, p1/m, z0.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    mov z1.d, #0x7fffffff
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    fcvtzs z1.d, p1/m, z0.h
-; CHECK-NEXT:    sel z0.d, p2, z2.d, z1.d
+; CHECK-NEXT:    sel z0.d, p1, z1.d, z2.d
 ; CHECK-NEXT:    mov z0.d, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 2 x i32> @llvm.fptosi.sat.nxv2f16.nxv2i32(<vscale x 2 x half> %f)
@@ -475,18 +474,17 @@ define <vscale x 2 x i32> @test_signed_v2f16_v2i32(<vscale x 2 x half> %f) {
 define <vscale x 4 x i32> @test_signed_v4f16_v4i32(<vscale x 4 x half> %f) {
 ; CHECK-LABEL: test_signed_v4f16_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
-; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    mov z2.s, #0x80000000
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.s, #0x80000000
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z0.h, z2.h
-; CHECK-NEXT:    mov z2.s, #0x7fffffff
+; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    fcvtzs z2.s, p1/m, z0.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    mov z1.s, #0x7fffffff
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    fcvtzs z1.s, p1/m, z0.h
-; CHECK-NEXT:    sel z0.s, p2, z2.s, z1.s
+; CHECK-NEXT:    sel z0.s, p1, z1.s, z2.s
 ; CHECK-NEXT:    mov z0.s, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 4 x i32> @llvm.fptosi.sat.nxv4f16.nxv4i32(<vscale x 4 x half> %f)
@@ -496,26 +494,25 @@ define <vscale x 4 x i32> @test_signed_v4f16_v4i32(<vscale x 4 x half> %f) {
 define <vscale x 8 x i32> @test_signed_v8f16_v8i32(<vscale x 8 x half> %f) {
 ; CHECK-LABEL: test_signed_v8f16_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
-; CHECK-NEXT:    uunpklo z1.s, z0.h
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
+; CHECK-NEXT:    uunpklo z2.s, z0.h
+; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    uunpkhi z0.s, z0.h
-; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    mov z3.s, #0x80000000
 ; CHECK-NEXT:    mov z4.s, #0x80000000
 ; CHECK-NEXT:    mov z5.h, w8
-; CHECK-NEXT:    fcmge p1.h, p0/z, z1.h, z2.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z0.h, z2.h
-; CHECK-NEXT:    mov z2.s, #0x7fffffff
+; CHECK-NEXT:    fcmge p1.h, p0/z, z2.h, z1.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    mov z1.s, #0x7fffffff
 ; CHECK-NEXT:    fcmgt p3.h, p0/z, z0.h, z5.h
-; CHECK-NEXT:    fcvtzs z3.s, p1/m, z1.h
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z5.h
+; CHECK-NEXT:    fcvtzs z3.s, p1/m, z2.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z2.h, z5.h
 ; CHECK-NEXT:    fcvtzs z4.s, p2/m, z0.h
-; CHECK-NEXT:    fcmuo p2.h, p0/z, z1.h, z1.h
+; CHECK-NEXT:    fcmuo p2.h, p0/z, z2.h, z2.h
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    sel z0.s, p1, z2.s, z3.s
-; CHECK-NEXT:    sel z1.s, p3, z2.s, z4.s
+; CHECK-NEXT:    sel z0.s, p1, z1.s, z3.s
+; CHECK-NEXT:    sel z1.s, p3, z1.s, z4.s
 ; CHECK-NEXT:    mov z0.s, p2/m, #0 // =0x0
 ; CHECK-NEXT:    mov z1.s, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
@@ -526,18 +523,17 @@ define <vscale x 8 x i32> @test_signed_v8f16_v8i32(<vscale x 8 x half> %f) {
 define <vscale x 4 x i16> @test_signed_v4f16_v4i16(<vscale x 4 x half> %f) {
 ; CHECK-LABEL: test_signed_v4f16_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #63488 // =0xf800
+; CHECK-NEXT:    dupm z1.h, #0xf800
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z2.s, #-32768 // =0xffffffffffff8000
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #30719 // =0x77ff
+; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.h, w8
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.s, #32767 // =0x7fff
-; CHECK-NEXT:    fcvtzs z2.s, p1/m, z0.h
+; CHECK-NEXT:    mov z1.s, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT:    fcvtzs z1.s, p1/m, z0.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z2.h
+; CHECK-NEXT:    mov z2.s, #32767 // =0x7fff
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    sel z0.s, p2, z1.s, z2.s
+; CHECK-NEXT:    sel z0.s, p1, z2.s, z1.s
 ; CHECK-NEXT:    mov z0.s, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f16.nxv4i16(<vscale x 4 x half> %f)
@@ -547,18 +543,17 @@ define <vscale x 4 x i16> @test_signed_v4f16_v4i16(<vscale x 4 x half> %f) {
 define <vscale x 8 x i16> @test_signed_v8f16_v8i16(<vscale x 8 x half> %f) {
 ; CHECK-LABEL: test_signed_v8f16_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #63488 // =0xf800
+; CHECK-NEXT:    dupm z1.h, #0xf800
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z2.h, #-32768 // =0xffffffffffff8000
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #30719 // =0x77ff
+; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.h, w8
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.h, #32767 // =0x7fff
-; CHECK-NEXT:    fcvtzs z2.h, p1/m, z0.h
+; CHECK-NEXT:    mov z1.h, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT:    fcvtzs z1.h, p1/m, z0.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z2.h
+; CHECK-NEXT:    mov z2.h, #32767 // =0x7fff
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    sel z0.h, p2, z1.h, z2.h
+; CHECK-NEXT:    sel z0.h, p1, z2.h, z1.h
 ; CHECK-NEXT:    mov z0.h, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f16.nxv8i16(<vscale x 8 x half> %f)
@@ -568,18 +563,17 @@ define <vscale x 8 x i16> @test_signed_v8f16_v8i16(<vscale x 8 x half> %f) {
 define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
 ; CHECK-LABEL: test_signed_v2f16_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
-; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    mov z2.d, #0x8000000000000000
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.d, #0x8000000000000000
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z0.h, z2.h
-; CHECK-NEXT:    mov z2.d, #0x7fffffffffffffff
+; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    fcvtzs z2.d, p1/m, z0.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    mov z1.d, #0x7fffffffffffffff
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    fcvtzs z1.d, p1/m, z0.h
-; CHECK-NEXT:    sel z0.d, p2, z2.d, z1.d
+; CHECK-NEXT:    sel z0.d, p1, z1.d, z2.d
 ; CHECK-NEXT:    mov z0.d, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 2 x i64> @llvm.fptosi.sat.nxv2f16.nxv2i64(<vscale x 2 x half> %f)
@@ -589,26 +583,25 @@ define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
 define <vscale x 4 x i64> @test_signed_v4f16_v4i64(<vscale x 4 x half> %f) {
 ; CHECK-LABEL: test_signed_v4f16_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
-; CHECK-NEXT:    uunpklo z1.d, z0.s
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
+; CHECK-NEXT:    uunpklo z2.d, z0.s
+; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    uunpkhi z0.d, z0.s
-; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    mov z3.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z4.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z5.h, w8
-; CHECK-NEXT:    fcmge p1.h, p0/z, z1.h, z2.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z0.h, z2.h
-; CHECK-NEXT:    mov z2.d, #0x7fffffffffffffff
+; CHECK-NEXT:    fcmge p1.h, p0/z, z2.h, z1.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    mov z1.d, #0x7fffffffffffffff
 ; CHECK-NEXT:    fcmgt p3.h, p0/z, z0.h, z5.h
-; CHECK-NEXT:    fcvtzs z3.d, p1/m, z1.h
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z5.h
+; CHECK-NEXT:    fcvtzs z3.d, p1/m, z2.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z2.h, z5.h
 ; CHECK-NEXT:    fcvtzs z4.d, p2/m, z0.h
-; CHECK-NEXT:    fcmuo p2.h, p0/z, z1.h, z1.h
+; CHECK-NEXT:    fcmuo p2.h, p0/z, z2.h, z2.h
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    sel z0.d, p1, z2.d, z3.d
-; CHECK-NEXT:    sel z1.d, p3, z2.d, z4.d
+; CHECK-NEXT:    sel z0.d, p1, z1.d, z3.d
+; CHECK-NEXT:    sel z1.d, p3, z1.d, z4.d
 ; CHECK-NEXT:    mov z0.d, p2/m, #0 // =0x0
 ; CHECK-NEXT:    mov z1.d, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-llrint.ll b/llvm/test/CodeGen/AArch64/sve-llrint.ll
index f964d70e0a05c..c2bb0c81ab405 100644
--- a/llvm/test/CodeGen/AArch64/sve-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-llrint.ll
@@ -5,9 +5,8 @@ define <vscale x 1 x i64> @llrint_v1i64_v1f16(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: llrint_v1i64_v1f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z2.d, #0x8000000000000000
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
@@ -28,9 +27,8 @@ define <vscale x 2 x i64> @llrint_v1i64_v2f16(<vscale x 2 x half> %x) {
 ; CHECK-LABEL: llrint_v1i64_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z2.d, #0x8000000000000000
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
@@ -52,10 +50,9 @@ define <vscale x 4 x i64> @llrint_v4i64_v4f16(<vscale x 4 x half> %x) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
 ; CHECK-NEXT:    uunpkhi z0.d, z0.s
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z2.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z3.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z4.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z5.d, #0x7fffffffffffffff
@@ -92,10 +89,9 @@ define <vscale x 8 x i64> @llrint_v8i64_v8f16(<vscale x 8 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    uunpklo z1.s, z0.h
 ; CHECK-NEXT:    uunpkhi z0.s, z0.h
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z4.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z4.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z5.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z6.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z7.d, #0x8000000000000000
@@ -162,12 +158,13 @@ define <vscale x 16 x i64> @llrint_v16i64_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    uunpklo z2.s, z0.h
 ; CHECK-NEXT:    uunpkhi z3.s, z0.h
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    uunpklo z7.s, z1.h
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z0.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    uunpkhi z1.s, z1.h
-; CHECK-NEXT:    mov z0.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z5.d, #0x8000000000000000
+; CHECK-NEXT:    mov z29.h, w8
 ; CHECK-NEXT:    mov z31.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpklo z4.d, z2.s
 ; CHECK-NEXT:    uunpklo z24.d, z3.s
@@ -175,10 +172,8 @@ define <vscale x 16 x i64> @llrint_v16i64_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    uunpkhi z6.d, z2.s
 ; CHECK-NEXT:    uunpklo z26.d, z7.s
 ; CHECK-NEXT:    uunpkhi z7.d, z7.s
-; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    mov w8, #31743 // =0x7bff
+; CHECK-NEXT:    mov z2.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpklo z30.d, z1.s
-; CHECK-NEXT:    mov z29.h, w8
 ; CHECK-NEXT:    mov z3.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpkhi z1.d, z1.s
 ; CHECK-NEXT:    movprfx z27, z4
@@ -191,17 +186,17 @@ define <vscale x 16 x i64> @llrint_v16i64_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    frintx z26.h, p0/m, z26.h
 ; CHECK-NEXT:    frintx z7.h, p0/m, z7.h
 ; CHECK-NEXT:    mov z6.d, #0x8000000000000000
-; CHECK-NEXT:    fcmge p1.h, p0/z, z27.h, z2.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z24.h, z2.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z25.h, z2.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z28.h, z2.h
-; CHECK-NEXT:    fcmge p5.h, p0/z, z26.h, z2.h
-; CHECK-NEXT:    fcvtzs z0.d, p1/m, z27.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z27.h, z0.h
+; CHECK-NEXT:    fcmge p3.h, p0/z, z24.h, z0.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z25.h, z0.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z28.h, z0.h
+; CHECK-NEXT:    fcmge p5.h, p0/z, z26.h, z0.h
+; CHECK-NEXT:    fcvtzs z2.d, p1/m, z27.h
 ; CHECK-NEXT:    fcvtzs z4.d, p3/m, z24.h
 ; CHECK-NEXT:    fcvtzs z5.d, p4/m, z25.h
 ; CHECK-NEXT:    fcmgt p3.h, p0/z, z27.h, z29.h
 ; CHECK-NEXT:    fcvtzs z3.d, p2/m, z28.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z7.h, z2.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z7.h, z0.h
 ; CHECK-NEXT:    fcvtzs z6.d, p5/m, z26.h
 ; CHECK-NEXT:    fcmuo p1.h, p0/z, z27.h, z27.h
 ; CHECK-NEXT:    movprfx z27, z30
@@ -212,7 +207,7 @@ define <vscale x 16 x i64> @llrint_v16i64_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    fcmuo p2.h, p0/z, z28.h, z28.h
 ; CHECK-NEXT:    mov z28.d, #0x8000000000000000
 ; CHECK-NEXT:    fcvtzs z31.d, p4/m, z7.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z2.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z0.h
 ; CHECK-NEXT:    fcmgt p6.h, p0/z, z24.h, z29.h
 ; CHECK-NEXT:    fcmuo p7.h, p0/z, z24.h, z24.h
 ; CHECK-NEXT:    mov z24.d, #0x7fffffffffffffff
@@ -221,31 +216,31 @@ define <vscale x 16 x i64> @llrint_v16i64_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    fcmuo p10.h, p0/z, z25.h, z25.h
 ; CHECK-NEXT:    mov z25.d, #0x8000000000000000
 ; CHECK-NEXT:    sel z1.d, p5, z24.d, z3.d
-; CHECK-NEXT:    mov z0.d, p3/m, z24.d
 ; CHECK-NEXT:    sel z3.d, p8, z24.d, z5.d
-; CHECK-NEXT:    fcmge p4.h, p0/z, z30.h, z2.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z30.h, z0.h
+; CHECK-NEXT:    sel z0.d, p3, z24.d, z2.d
 ; CHECK-NEXT:    sel z2.d, p6, z24.d, z4.d
-; CHECK-NEXT:    mov z0.d, p1/m, #0 // =0x0
 ; CHECK-NEXT:    mov z1.d, p2/m, #0 // =0x0
 ; CHECK-NEXT:    mov z3.d, p10/m, #0 // =0x0
 ; CHECK-NEXT:    ldr p10, [sp, #1, mul vl] // 2-byte Reload
+; CHECK-NEXT:    fcmgt p9.h, p0/z, z26.h, z29.h
 ; CHECK-NEXT:    mov z2.d, p7/m, #0 // =0x0
 ; CHECK-NEXT:    ldr p7, [sp, #4, mul vl] // 2-byte Reload
-; CHECK-NEXT:    fcmgt p9.h, p0/z, z26.h, z29.h
 ; CHECK-NEXT:    fcvtzs z25.d, p4/m, z30.h
+; CHECK-NEXT:    mov z0.d, p1/m, #0 // =0x0
 ; CHECK-NEXT:    fcmgt p5.h, p0/z, z7.h, z29.h
 ; CHECK-NEXT:    fcmgt p6.h, p0/z, z27.h, z29.h
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z30.h, z29.h
 ; CHECK-NEXT:    sel z4.d, p9, z24.d, z6.d
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z30.h, z29.h
 ; CHECK-NEXT:    fcmuo p8.h, p0/z, z7.h, z7.h
 ; CHECK-NEXT:    sel z5.d, p5, z24.d, z31.d
 ; CHECK-NEXT:    ldr p5, [sp, #6, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    sel z6.d, p6, z24.d, z28.d
 ; CHECK-NEXT:    ldr p6, [sp, #5, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    fcmuo p9.h, p0/z, z27.h, z27.h
+; CHECK-NEXT:    fcmuo p3.h, p0/z, z26.h, z26.h
 ; CHECK-NEXT:    sel z7.d, p4, z24.d, z25.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Reload
-; CHECK-NEXT:    fcmuo p3.h, p0/z, z26.h, z26.h
 ; CHECK-NEXT:    mov z5.d, p8/m, #0 // =0x0
 ; CHECK-NEXT:    ldr p8, [sp, #3, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z30.h, z30.h
@@ -302,48 +297,47 @@ define <vscale x 32 x i64> @llrint_v32i64_v32f16(<vscale x 32 x half> %x) {
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4e, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x40, 0x1c // $d14 @ cfa - 56 * VG - 16
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4f, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x40, 0x1c // $d15 @ cfa - 64 * VG - 16
 ; CHECK-NEXT:    uunpklo z4.s, z0.h
-; CHECK-NEXT:    uunpkhi z5.s, z0.h
-; CHECK-NEXT:    mov w9, #64511 // =0xfbff
-; CHECK-NEXT:    uunpklo z6.s, z1.h
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    uunpkhi z28.s, z1.h
-; CHECK-NEXT:    mov z30.h, w9
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
 ; CHECK-NEXT:    mov w9, #31743 // =0x7bff
+; CHECK-NEXT:    uunpklo z5.s, z1.h
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z28.h, #-1025 // =0xfffffffffffffbff
+; CHECK-NEXT:    uunpkhi z29.s, z1.h
+; CHECK-NEXT:    mov z7.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpklo z13.s, z2.h
 ; CHECK-NEXT:    mov z9.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpkhi z14.s, z2.h
 ; CHECK-NEXT:    uunpkhi z17.s, z3.h
-; CHECK-NEXT:    uunpklo z7.d, z4.s
+; CHECK-NEXT:    uunpklo z6.d, z4.s
 ; CHECK-NEXT:    uunpkhi z4.d, z4.s
-; CHECK-NEXT:    uunpklo z27.d, z5.s
-; CHECK-NEXT:    uunpklo z31.d, z6.s
-; CHECK-NEXT:    uunpkhi z8.d, z6.s
-; CHECK-NEXT:    uunpkhi z29.d, z5.s
-; CHECK-NEXT:    uunpkhi z11.d, z28.s
-; CHECK-NEXT:    uunpklo z10.d, z28.s
+; CHECK-NEXT:    uunpklo z27.d, z0.s
+; CHECK-NEXT:    uunpklo z31.d, z5.s
+; CHECK-NEXT:    uunpkhi z8.d, z5.s
+; CHECK-NEXT:    uunpkhi z30.d, z0.s
+; CHECK-NEXT:    uunpkhi z11.d, z29.s
+; CHECK-NEXT:    uunpklo z10.d, z29.s
 ; CHECK-NEXT:    uunpklo z15.s, z3.h
 ; CHECK-NEXT:    uunpklo z16.d, z14.s
 ; CHECK-NEXT:    uunpkhi z14.d, z14.s
 ; CHECK-NEXT:    mov z24.d, #0x8000000000000000
-; CHECK-NEXT:    movprfx z1, z7
-; CHECK-NEXT:    frintx z1.h, p0/m, z7.h
 ; CHECK-NEXT:    movprfx z5, z27
 ; CHECK-NEXT:    frintx z5.h, p0/m, z27.h
+; CHECK-NEXT:    movprfx z1, z6
+; CHECK-NEXT:    frintx z1.h, p0/m, z6.h
 ; CHECK-NEXT:    frintx z4.h, p0/m, z4.h
 ; CHECK-NEXT:    movprfx z12, z31
 ; CHECK-NEXT:    frintx z12.h, p0/m, z31.h
 ; CHECK-NEXT:    movprfx z27, z8
 ; CHECK-NEXT:    frintx z27.h, p0/m, z8.h
-; CHECK-NEXT:    movprfx z6, z29
-; CHECK-NEXT:    frintx z6.h, p0/m, z29.h
+; CHECK-NEXT:    movprfx z6, z30
+; CHECK-NEXT:    frintx z6.h, p0/m, z30.h
 ; CHECK-NEXT:    movprfx z31, z10
 ; CHECK-NEXT:    frintx z31.h, p0/m, z10.h
-; CHECK-NEXT:    mov z7.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z8.d, #0x8000000000000000
+; CHECK-NEXT:    frintx z11.h, p0/m, z11.h
 ; CHECK-NEXT:    movprfx z3, z16
 ; CHECK-NEXT:    frintx z3.h, p0/m, z16.h
-; CHECK-NEXT:    frintx z11.h, p0/m, z11.h
-; CHECK-NEXT:    mov z29.h, w9
+; CHECK-NEXT:    mov z30.h, w9
 ; CHECK-NEXT:    uunpklo z10.d, z13.s
 ; CHECK-NEXT:    uunpkhi z13.d, z13.s
 ; CHECK-NEXT:    uunpkhi z20.d, z15.s
@@ -354,124 +348,124 @@ define <vscale x 32 x i64> @llrint_v32i64_v32f16(<vscale x 32 x half> %x) {
 ; CHECK-NEXT:    uunpklo z15.d, z15.s
 ; CHECK-NEXT:    mov z2.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z21.d, #0x8000000000000000
+; CHECK-NEXT:    frintx z10.h, p0/m, z10.h
 ; CHECK-NEXT:    mov z26.d, #0x8000000000000000
-; CHECK-NEXT:    mov z28.d, #0x7fffffffffffffff
+; CHECK-NEXT:    mov z29.d, #0x7fffffffffffffff
 ; CHECK-NEXT:    movprfx z19, z13
 ; CHECK-NEXT:    frintx z19.h, p0/m, z13.h
 ; CHECK-NEXT:    movprfx z13, z14
 ; CHECK-NEXT:    frintx z13.h, p0/m, z14.h
-; CHECK-NEXT:    frintx z10.h, p0/m, z10.h
 ; CHECK-NEXT:    frintx z16.h, p0/m, z16.h
 ; CHECK-NEXT:    mov z22.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z23.d, #0x8000000000000000
-; CHECK-NEXT:    frintx z15.h, p0/m, z15.h
 ; CHECK-NEXT:    mov z14.d, #0x8000000000000000
-; CHECK-NEXT:    fcmge p4.h, p0/z, z4.h, z30.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z12.h, z30.h
-; CHECK-NEXT:    fcmgt p9.h, p0/z, z12.h, z29.h
+; CHECK-NEXT:    frintx z15.h, p0/m, z15.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z4.h, z28.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z12.h, z28.h
+; CHECK-NEXT:    fcmgt p9.h, p0/z, z12.h, z30.h
 ; CHECK-NEXT:    fcmuo p8.h, p0/z, z12.h, z12.h
 ; CHECK-NEXT:    fcvtzs z7.d, p4/m, z4.h
 ; CHECK-NEXT:    fcvtzs z8.d, p2/m, z12.h
 ; CHECK-NEXT:    mov z12.d, #0x8000000000000000
-; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z30.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z28.h
 ; CHECK-NEXT:    fcmuo p10.h, p0/z, z11.h, z11.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z5.h, z30.h
-; CHECK-NEXT:    mov z8.d, p9/m, z28.d
+; CHECK-NEXT:    fcmge p3.h, p0/z, z5.h, z28.h
+; CHECK-NEXT:    mov z8.d, p9/m, z29.d
 ; CHECK-NEXT:    fcvtzs z9.d, p4/m, z27.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z11.h, z30.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z11.h, z28.h
 ; CHECK-NEXT:    fcvtzs z24.d, p3/m, z5.h
 ; CHECK-NEXT:    mov z8.d, p8/m, #0 // =0x0
-; CHECK-NEXT:    fcmge p1.h, p0/z, z6.h, z30.h
-; CHECK-NEXT:    fcmge p5.h, p0/z, z1.h, z30.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z6.h, z28.h
+; CHECK-NEXT:    fcmge p5.h, p0/z, z1.h, z28.h
 ; CHECK-NEXT:    str z8, [x8, #4, mul vl]
 ; CHECK-NEXT:    fcvtzs z12.d, p4/m, z11.h
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z29.h
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z30.h
 ; CHECK-NEXT:    uunpkhi z11.d, z17.s
 ; CHECK-NEXT:    movprfx z17, z20
 ; CHECK-NEXT:    frintx z17.h, p0/m, z20.h
 ; CHECK-NEXT:    fcvtzs z25.d, p1/m, z6.h
 ; CHECK-NEXT:    mov z20.d, #0x8000000000000000
 ; CHECK-NEXT:    fcvtzs z0.d, p5/m, z1.h
-; CHECK-NEXT:    fcmge p6.h, p0/z, z10.h, z30.h
+; CHECK-NEXT:    fcmge p6.h, p0/z, z10.h, z28.h
 ; CHECK-NEXT:    frintx z11.h, p0/m, z11.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z31.h, z30.h
-; CHECK-NEXT:    fcmge p1.h, p0/z, z13.h, z30.h
+; CHECK-NEXT:    fcmge p3.h, p0/z, z31.h, z28.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z13.h, z28.h
 ; CHECK-NEXT:    fcvtzs z18.d, p6/m, z10.h
-; CHECK-NEXT:    fcmgt p11.h, p0/z, z10.h, z29.h
-; CHECK-NEXT:    fcmge p5.h, p0/z, z11.h, z30.h
+; CHECK-NEXT:    fcmgt p11.h, p0/z, z10.h, z30.h
+; CHECK-NEXT:    fcmge p5.h, p0/z, z11.h, z28.h
 ; CHECK-NEXT:    fcvtzs z2.d, p3/m, z31.h
 ; CHECK-NEXT:    fcvtzs z21.d, p1/m, z13.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z17.h, z30.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z16.h, z30.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z17.h, z28.h
+; CHECK-NEXT:    fcmge p3.h, p0/z, z16.h, z28.h
 ; CHECK-NEXT:    fcmuo p1.h, p0/z, z10.h, z10.h
-; CHECK-NEXT:    sel z10.d, p4, z28.d, z12.d
-; CHECK-NEXT:    sel z12.d, p11, z28.d, z18.d
+; CHECK-NEXT:    sel z10.d, p4, z29.d, z12.d
+; CHECK-NEXT:    sel z12.d, p11, z29.d, z18.d
 ; CHECK-NEXT:    fcvtzs z26.d, p5/m, z11.h
 ; CHECK-NEXT:    fcvtzs z22.d, p2/m, z17.h
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z29.h
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z30.h
 ; CHECK-NEXT:    fcvtzs z23.d, p3/m, z16.h
 ; CHECK-NEXT:    mov z10.d, p10/m, #0 // =0x0
 ; CHECK-NEXT:    mov z12.d, p1/m, #0 // =0x0
-; CHECK-NEXT:    fcmge p6.h, p0/z, z19.h, z30.h
+; CHECK-NEXT:    fcmge p6.h, p0/z, z19.h, z28.h
 ; CHECK-NEXT:    str z10, [x8, #7, mul vl]
-; CHECK-NEXT:    fcmge p7.h, p0/z, z3.h, z30.h
+; CHECK-NEXT:    fcmge p7.h, p0/z, z3.h, z28.h
 ; CHECK-NEXT:    str z12, [x8, #8, mul vl]
-; CHECK-NEXT:    mov z26.d, p4/m, z28.d
-; CHECK-NEXT:    fcmge p2.h, p0/z, z15.h, z30.h
-; CHECK-NEXT:    mov z30.d, #0x8000000000000000
+; CHECK-NEXT:    mov z26.d, p4/m, z29.d
+; CHECK-NEXT:    fcmge p2.h, p0/z, z15.h, z28.h
+; CHECK-NEXT:    mov z28.d, #0x8000000000000000
 ; CHECK-NEXT:    fcvtzs z14.d, p6/m, z19.h
-; CHECK-NEXT:    fcmgt p5.h, p0/z, z16.h, z29.h
-; CHECK-NEXT:    fcmgt p3.h, p0/z, z17.h, z29.h
+; CHECK-NEXT:    fcmgt p5.h, p0/z, z16.h, z30.h
+; CHECK-NEXT:    fcmgt p3.h, p0/z, z17.h, z30.h
 ; CHECK-NEXT:    fcvtzs z20.d, p7/m, z3.h
-; CHECK-NEXT:    fcvtzs z30.d, p2/m, z15.h
+; CHECK-NEXT:    fcvtzs z28.d, p2/m, z15.h
 ; CHECK-NEXT:    fcmuo p1.h, p0/z, z11.h, z11.h
 ; CHECK-NEXT:    fcmuo p2.h, p0/z, z16.h, z16.h
-; CHECK-NEXT:    sel z11.d, p5, z28.d, z23.d
-; CHECK-NEXT:    sel z16.d, p3, z28.d, z22.d
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z19.h, z29.h
-; CHECK-NEXT:    fcmgt p3.h, p0/z, z15.h, z29.h
+; CHECK-NEXT:    sel z11.d, p5, z29.d, z23.d
+; CHECK-NEXT:    sel z16.d, p3, z29.d, z22.d
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z19.h, z30.h
+; CHECK-NEXT:    fcmgt p3.h, p0/z, z15.h, z30.h
 ; CHECK-NEXT:    mov z26.d, p1/m, #0 // =0x0
 ; CHECK-NEXT:    mov z11.d, p2/m, #0 // =0x0
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z13.h, z29.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z13.h, z30.h
 ; CHECK-NEXT:    fcmuo p6.h, p0/z, z17.h, z17.h
 ; CHECK-NEXT:    str z26, [x8, #15, mul vl]
-; CHECK-NEXT:    sel z26.d, p4, z28.d, z14.d
+; CHECK-NEXT:    sel z26.d, p4, z29.d, z14.d
 ; CHECK-NEXT:    str z11, [x8, #14, mul vl]
-; CHECK-NEXT:    mov z30.d, p3/m, z28.d
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z3.h, z29.h
+; CHECK-NEXT:    mov z28.d, p3/m, z29.d
+; CHECK-NEXT:    fcmgt p2.h, p0/z, z3.h, z30.h
 ; CHECK-NEXT:    fcmuo p4.h, p0/z, z13.h, z13.h
 ; CHECK-NEXT:    fcmuo p3.h, p0/z, z3.h, z3.h
-; CHECK-NEXT:    sel z3.d, p1, z28.d, z21.d
+; CHECK-NEXT:    sel z3.d, p1, z29.d, z21.d
 ; CHECK-NEXT:    mov z16.d, p6/m, #0 // =0x0
-; CHECK-NEXT:    fcmgt p12.h, p0/z, z27.h, z29.h
-; CHECK-NEXT:    sel z11.d, p2, z28.d, z20.d
+; CHECK-NEXT:    fcmgt p12.h, p0/z, z27.h, z30.h
+; CHECK-NEXT:    sel z11.d, p2, z29.d, z20.d
 ; CHECK-NEXT:    str z16, [x8, #13, mul vl]
 ; CHECK-NEXT:    mov z3.d, p4/m, #0 // =0x0
 ; CHECK-NEXT:    fcmuo p6.h, p0/z, z15.h, z15.h
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z4.h, z29.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z4.h, z30.h
 ; CHECK-NEXT:    mov z11.d, p3/m, #0 // =0x0
-; CHECK-NEXT:    mov z9.d, p12/m, z28.d
+; CHECK-NEXT:    mov z9.d, p12/m, z29.d
 ; CHECK-NEXT:    str z3, [x8, #11, mul vl]
 ; CHECK-NEXT:    fcmuo p5.h, p0/z, z19.h, z19.h
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z5.h, z29.h
+; CHECK-NEXT:    fcmgt p2.h, p0/z, z5.h, z30.h
 ; CHECK-NEXT:    str z11, [x8, #10, mul vl]
-; CHECK-NEXT:    mov z30.d, p6/m, #0 // =0x0
-; CHECK-NEXT:    sel z3.d, p1, z28.d, z7.d
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z6.h, z29.h
+; CHECK-NEXT:    mov z28.d, p6/m, #0 // =0x0
+; CHECK-NEXT:    sel z3.d, p1, z29.d, z7.d
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z6.h, z30.h
 ; CHECK-NEXT:    fcmuo p3.h, p0/z, z27.h, z27.h
-; CHECK-NEXT:    str z30, [x8, #12, mul vl]
+; CHECK-NEXT:    str z28, [x8, #12, mul vl]
 ; CHECK-NEXT:    mov z26.d, p5/m, #0 // =0x0
-; CHECK-NEXT:    sel z7.d, p2, z28.d, z24.d
-; CHECK-NEXT:    fcmgt p6.h, p0/z, z31.h, z29.h
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z29.h
+; CHECK-NEXT:    sel z7.d, p2, z29.d, z24.d
+; CHECK-NEXT:    fcmgt p6.h, p0/z, z31.h, z30.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z30.h
 ; CHECK-NEXT:    str z26, [x8, #9, mul vl]
-; CHECK-NEXT:    sel z24.d, p4, z28.d, z25.d
+; CHECK-NEXT:    sel z24.d, p4, z29.d, z25.d
 ; CHECK-NEXT:    mov z9.d, p3/m, #0 // =0x0
 ; CHECK-NEXT:    fcmuo p5.h, p0/z, z31.h, z31.h
 ; CHECK-NEXT:    fcmuo p2.h, p0/z, z6.h, z6.h
-; CHECK-NEXT:    mov z2.d, p6/m, z28.d
+; CHECK-NEXT:    mov z2.d, p6/m, z29.d
 ; CHECK-NEXT:    str z9, [x8, #5, mul vl]
-; CHECK-NEXT:    mov z0.d, p1/m, z28.d
+; CHECK-NEXT:    mov z0.d, p1/m, z29.d
 ; CHECK-NEXT:    fcmuo p3.h, p0/z, z5.h, z5.h
 ; CHECK-NEXT:    fcmuo p4.h, p0/z, z4.h, z4.h
 ; CHECK-NEXT:    mov z2.d, p5/m, #0 // =0x0

diff  --git a/llvm/test/CodeGen/AArch64/sve-lrint.ll b/llvm/test/CodeGen/AArch64/sve-lrint.ll
index f517e7fe8dc16..f1224d30d53cc 100644
--- a/llvm/test/CodeGen/AArch64/sve-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-lrint.ll
@@ -6,9 +6,8 @@ define <vscale x 1 x iXLen> @lrint_v1f16(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: lrint_v1f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z2.d, #0x8000000000000000
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
@@ -29,9 +28,8 @@ define <vscale x 2 x iXLen> @lrint_v2f16(<vscale x 2 x half> %x) {
 ; CHECK-LABEL: lrint_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z2.d, #0x8000000000000000
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
@@ -53,10 +51,9 @@ define <vscale x 4 x iXLen> @lrint_v4f16(<vscale x 4 x half> %x) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
 ; CHECK-NEXT:    uunpkhi z0.d, z0.s
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z2.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z3.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z4.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z5.d, #0x7fffffffffffffff
@@ -93,10 +90,9 @@ define <vscale x 8 x iXLen> @lrint_v8f16(<vscale x 8 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    uunpklo z1.s, z0.h
 ; CHECK-NEXT:    uunpkhi z0.s, z0.h
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z4.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z4.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z5.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z6.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z7.d, #0x8000000000000000
@@ -163,12 +159,13 @@ define <vscale x 16 x iXLen> @lrint_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    uunpklo z2.s, z0.h
 ; CHECK-NEXT:    uunpkhi z3.s, z0.h
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    uunpklo z7.s, z1.h
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z0.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    uunpkhi z1.s, z1.h
-; CHECK-NEXT:    mov z0.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z5.d, #0x8000000000000000
+; CHECK-NEXT:    mov z29.h, w8
 ; CHECK-NEXT:    mov z31.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpklo z4.d, z2.s
 ; CHECK-NEXT:    uunpklo z24.d, z3.s
@@ -176,10 +173,8 @@ define <vscale x 16 x iXLen> @lrint_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    uunpkhi z6.d, z2.s
 ; CHECK-NEXT:    uunpklo z26.d, z7.s
 ; CHECK-NEXT:    uunpkhi z7.d, z7.s
-; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    mov w8, #31743 // =0x7bff
+; CHECK-NEXT:    mov z2.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpklo z30.d, z1.s
-; CHECK-NEXT:    mov z29.h, w8
 ; CHECK-NEXT:    mov z3.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpkhi z1.d, z1.s
 ; CHECK-NEXT:    movprfx z27, z4
@@ -192,17 +187,17 @@ define <vscale x 16 x iXLen> @lrint_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    frintx z26.h, p0/m, z26.h
 ; CHECK-NEXT:    frintx z7.h, p0/m, z7.h
 ; CHECK-NEXT:    mov z6.d, #0x8000000000000000
-; CHECK-NEXT:    fcmge p1.h, p0/z, z27.h, z2.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z24.h, z2.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z25.h, z2.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z28.h, z2.h
-; CHECK-NEXT:    fcmge p5.h, p0/z, z26.h, z2.h
-; CHECK-NEXT:    fcvtzs z0.d, p1/m, z27.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z27.h, z0.h
+; CHECK-NEXT:    fcmge p3.h, p0/z, z24.h, z0.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z25.h, z0.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z28.h, z0.h
+; CHECK-NEXT:    fcmge p5.h, p0/z, z26.h, z0.h
+; CHECK-NEXT:    fcvtzs z2.d, p1/m, z27.h
 ; CHECK-NEXT:    fcvtzs z4.d, p3/m, z24.h
 ; CHECK-NEXT:    fcvtzs z5.d, p4/m, z25.h
 ; CHECK-NEXT:    fcmgt p3.h, p0/z, z27.h, z29.h
 ; CHECK-NEXT:    fcvtzs z3.d, p2/m, z28.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z7.h, z2.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z7.h, z0.h
 ; CHECK-NEXT:    fcvtzs z6.d, p5/m, z26.h
 ; CHECK-NEXT:    fcmuo p1.h, p0/z, z27.h, z27.h
 ; CHECK-NEXT:    movprfx z27, z30
@@ -213,7 +208,7 @@ define <vscale x 16 x iXLen> @lrint_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    fcmuo p2.h, p0/z, z28.h, z28.h
 ; CHECK-NEXT:    mov z28.d, #0x8000000000000000
 ; CHECK-NEXT:    fcvtzs z31.d, p4/m, z7.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z2.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z0.h
 ; CHECK-NEXT:    fcmgt p6.h, p0/z, z24.h, z29.h
 ; CHECK-NEXT:    fcmuo p7.h, p0/z, z24.h, z24.h
 ; CHECK-NEXT:    mov z24.d, #0x7fffffffffffffff
@@ -222,31 +217,31 @@ define <vscale x 16 x iXLen> @lrint_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    fcmuo p10.h, p0/z, z25.h, z25.h
 ; CHECK-NEXT:    mov z25.d, #0x8000000000000000
 ; CHECK-NEXT:    sel z1.d, p5, z24.d, z3.d
-; CHECK-NEXT:    mov z0.d, p3/m, z24.d
 ; CHECK-NEXT:    sel z3.d, p8, z24.d, z5.d
-; CHECK-NEXT:    fcmge p4.h, p0/z, z30.h, z2.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z30.h, z0.h
+; CHECK-NEXT:    sel z0.d, p3, z24.d, z2.d
 ; CHECK-NEXT:    sel z2.d, p6, z24.d, z4.d
-; CHECK-NEXT:    mov z0.d, p1/m, #0 // =0x0
 ; CHECK-NEXT:    mov z1.d, p2/m, #0 // =0x0
 ; CHECK-NEXT:    mov z3.d, p10/m, #0 // =0x0
 ; CHECK-NEXT:    ldr p10, [sp, #1, mul vl] // 2-byte Reload
+; CHECK-NEXT:    fcmgt p9.h, p0/z, z26.h, z29.h
 ; CHECK-NEXT:    mov z2.d, p7/m, #0 // =0x0
 ; CHECK-NEXT:    ldr p7, [sp, #4, mul vl] // 2-byte Reload
-; CHECK-NEXT:    fcmgt p9.h, p0/z, z26.h, z29.h
 ; CHECK-NEXT:    fcvtzs z25.d, p4/m, z30.h
+; CHECK-NEXT:    mov z0.d, p1/m, #0 // =0x0
 ; CHECK-NEXT:    fcmgt p5.h, p0/z, z7.h, z29.h
 ; CHECK-NEXT:    fcmgt p6.h, p0/z, z27.h, z29.h
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z30.h, z29.h
 ; CHECK-NEXT:    sel z4.d, p9, z24.d, z6.d
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z30.h, z29.h
 ; CHECK-NEXT:    fcmuo p8.h, p0/z, z7.h, z7.h
 ; CHECK-NEXT:    sel z5.d, p5, z24.d, z31.d
 ; CHECK-NEXT:    ldr p5, [sp, #6, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    sel z6.d, p6, z24.d, z28.d
 ; CHECK-NEXT:    ldr p6, [sp, #5, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    fcmuo p9.h, p0/z, z27.h, z27.h
+; CHECK-NEXT:    fcmuo p3.h, p0/z, z26.h, z26.h
 ; CHECK-NEXT:    sel z7.d, p4, z24.d, z25.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Reload
-; CHECK-NEXT:    fcmuo p3.h, p0/z, z26.h, z26.h
 ; CHECK-NEXT:    mov z5.d, p8/m, #0 // =0x0
 ; CHECK-NEXT:    ldr p8, [sp, #3, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z30.h, z30.h
@@ -303,48 +298,47 @@ define <vscale x 32 x iXLen> @lrint_v32f16(<vscale x 32 x half> %x) {
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4e, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x40, 0x1c // $d14 @ cfa - 56 * VG - 16
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4f, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x40, 0x1c // $d15 @ cfa - 64 * VG - 16
 ; CHECK-NEXT:    uunpklo z4.s, z0.h
-; CHECK-NEXT:    uunpkhi z5.s, z0.h
-; CHECK-NEXT:    mov w9, #64511 // =0xfbff
-; CHECK-NEXT:    uunpklo z6.s, z1.h
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    uunpkhi z28.s, z1.h
-; CHECK-NEXT:    mov z30.h, w9
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
 ; CHECK-NEXT:    mov w9, #31743 // =0x7bff
+; CHECK-NEXT:    uunpklo z5.s, z1.h
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z28.h, #-1025 // =0xfffffffffffffbff
+; CHECK-NEXT:    uunpkhi z29.s, z1.h
+; CHECK-NEXT:    mov z7.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpklo z13.s, z2.h
 ; CHECK-NEXT:    mov z9.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpkhi z14.s, z2.h
 ; CHECK-NEXT:    uunpkhi z17.s, z3.h
-; CHECK-NEXT:    uunpklo z7.d, z4.s
+; CHECK-NEXT:    uunpklo z6.d, z4.s
 ; CHECK-NEXT:    uunpkhi z4.d, z4.s
-; CHECK-NEXT:    uunpklo z27.d, z5.s
-; CHECK-NEXT:    uunpklo z31.d, z6.s
-; CHECK-NEXT:    uunpkhi z8.d, z6.s
-; CHECK-NEXT:    uunpkhi z29.d, z5.s
-; CHECK-NEXT:    uunpkhi z11.d, z28.s
-; CHECK-NEXT:    uunpklo z10.d, z28.s
+; CHECK-NEXT:    uunpklo z27.d, z0.s
+; CHECK-NEXT:    uunpklo z31.d, z5.s
+; CHECK-NEXT:    uunpkhi z8.d, z5.s
+; CHECK-NEXT:    uunpkhi z30.d, z0.s
+; CHECK-NEXT:    uunpkhi z11.d, z29.s
+; CHECK-NEXT:    uunpklo z10.d, z29.s
 ; CHECK-NEXT:    uunpklo z15.s, z3.h
 ; CHECK-NEXT:    uunpklo z16.d, z14.s
 ; CHECK-NEXT:    uunpkhi z14.d, z14.s
 ; CHECK-NEXT:    mov z24.d, #0x8000000000000000
-; CHECK-NEXT:    movprfx z1, z7
-; CHECK-NEXT:    frintx z1.h, p0/m, z7.h
 ; CHECK-NEXT:    movprfx z5, z27
 ; CHECK-NEXT:    frintx z5.h, p0/m, z27.h
+; CHECK-NEXT:    movprfx z1, z6
+; CHECK-NEXT:    frintx z1.h, p0/m, z6.h
 ; CHECK-NEXT:    frintx z4.h, p0/m, z4.h
 ; CHECK-NEXT:    movprfx z12, z31
 ; CHECK-NEXT:    frintx z12.h, p0/m, z31.h
 ; CHECK-NEXT:    movprfx z27, z8
 ; CHECK-NEXT:    frintx z27.h, p0/m, z8.h
-; CHECK-NEXT:    movprfx z6, z29
-; CHECK-NEXT:    frintx z6.h, p0/m, z29.h
+; CHECK-NEXT:    movprfx z6, z30
+; CHECK-NEXT:    frintx z6.h, p0/m, z30.h
 ; CHECK-NEXT:    movprfx z31, z10
 ; CHECK-NEXT:    frintx z31.h, p0/m, z10.h
-; CHECK-NEXT:    mov z7.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z8.d, #0x8000000000000000
+; CHECK-NEXT:    frintx z11.h, p0/m, z11.h
 ; CHECK-NEXT:    movprfx z3, z16
 ; CHECK-NEXT:    frintx z3.h, p0/m, z16.h
-; CHECK-NEXT:    frintx z11.h, p0/m, z11.h
-; CHECK-NEXT:    mov z29.h, w9
+; CHECK-NEXT:    mov z30.h, w9
 ; CHECK-NEXT:    uunpklo z10.d, z13.s
 ; CHECK-NEXT:    uunpkhi z13.d, z13.s
 ; CHECK-NEXT:    uunpkhi z20.d, z15.s
@@ -355,124 +349,124 @@ define <vscale x 32 x iXLen> @lrint_v32f16(<vscale x 32 x half> %x) {
 ; CHECK-NEXT:    uunpklo z15.d, z15.s
 ; CHECK-NEXT:    mov z2.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z21.d, #0x8000000000000000
+; CHECK-NEXT:    frintx z10.h, p0/m, z10.h
 ; CHECK-NEXT:    mov z26.d, #0x8000000000000000
-; CHECK-NEXT:    mov z28.d, #0x7fffffffffffffff
+; CHECK-NEXT:    mov z29.d, #0x7fffffffffffffff
 ; CHECK-NEXT:    movprfx z19, z13
 ; CHECK-NEXT:    frintx z19.h, p0/m, z13.h
 ; CHECK-NEXT:    movprfx z13, z14
 ; CHECK-NEXT:    frintx z13.h, p0/m, z14.h
-; CHECK-NEXT:    frintx z10.h, p0/m, z10.h
 ; CHECK-NEXT:    frintx z16.h, p0/m, z16.h
 ; CHECK-NEXT:    mov z22.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z23.d, #0x8000000000000000
-; CHECK-NEXT:    frintx z15.h, p0/m, z15.h
 ; CHECK-NEXT:    mov z14.d, #0x8000000000000000
-; CHECK-NEXT:    fcmge p4.h, p0/z, z4.h, z30.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z12.h, z30.h
-; CHECK-NEXT:    fcmgt p9.h, p0/z, z12.h, z29.h
+; CHECK-NEXT:    frintx z15.h, p0/m, z15.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z4.h, z28.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z12.h, z28.h
+; CHECK-NEXT:    fcmgt p9.h, p0/z, z12.h, z30.h
 ; CHECK-NEXT:    fcmuo p8.h, p0/z, z12.h, z12.h
 ; CHECK-NEXT:    fcvtzs z7.d, p4/m, z4.h
 ; CHECK-NEXT:    fcvtzs z8.d, p2/m, z12.h
 ; CHECK-NEXT:    mov z12.d, #0x8000000000000000
-; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z30.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z28.h
 ; CHECK-NEXT:    fcmuo p10.h, p0/z, z11.h, z11.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z5.h, z30.h
-; CHECK-NEXT:    mov z8.d, p9/m, z28.d
+; CHECK-NEXT:    fcmge p3.h, p0/z, z5.h, z28.h
+; CHECK-NEXT:    mov z8.d, p9/m, z29.d
 ; CHECK-NEXT:    fcvtzs z9.d, p4/m, z27.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z11.h, z30.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z11.h, z28.h
 ; CHECK-NEXT:    fcvtzs z24.d, p3/m, z5.h
 ; CHECK-NEXT:    mov z8.d, p8/m, #0 // =0x0
-; CHECK-NEXT:    fcmge p1.h, p0/z, z6.h, z30.h
-; CHECK-NEXT:    fcmge p5.h, p0/z, z1.h, z30.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z6.h, z28.h
+; CHECK-NEXT:    fcmge p5.h, p0/z, z1.h, z28.h
 ; CHECK-NEXT:    str z8, [x8, #4, mul vl]
 ; CHECK-NEXT:    fcvtzs z12.d, p4/m, z11.h
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z29.h
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z30.h
 ; CHECK-NEXT:    uunpkhi z11.d, z17.s
 ; CHECK-NEXT:    movprfx z17, z20
 ; CHECK-NEXT:    frintx z17.h, p0/m, z20.h
 ; CHECK-NEXT:    fcvtzs z25.d, p1/m, z6.h
 ; CHECK-NEXT:    mov z20.d, #0x8000000000000000
 ; CHECK-NEXT:    fcvtzs z0.d, p5/m, z1.h
-; CHECK-NEXT:    fcmge p6.h, p0/z, z10.h, z30.h
+; CHECK-NEXT:    fcmge p6.h, p0/z, z10.h, z28.h
 ; CHECK-NEXT:    frintx z11.h, p0/m, z11.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z31.h, z30.h
-; CHECK-NEXT:    fcmge p1.h, p0/z, z13.h, z30.h
+; CHECK-NEXT:    fcmge p3.h, p0/z, z31.h, z28.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z13.h, z28.h
 ; CHECK-NEXT:    fcvtzs z18.d, p6/m, z10.h
-; CHECK-NEXT:    fcmgt p11.h, p0/z, z10.h, z29.h
-; CHECK-NEXT:    fcmge p5.h, p0/z, z11.h, z30.h
+; CHECK-NEXT:    fcmgt p11.h, p0/z, z10.h, z30.h
+; CHECK-NEXT:    fcmge p5.h, p0/z, z11.h, z28.h
 ; CHECK-NEXT:    fcvtzs z2.d, p3/m, z31.h
 ; CHECK-NEXT:    fcvtzs z21.d, p1/m, z13.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z17.h, z30.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z16.h, z30.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z17.h, z28.h
+; CHECK-NEXT:    fcmge p3.h, p0/z, z16.h, z28.h
 ; CHECK-NEXT:    fcmuo p1.h, p0/z, z10.h, z10.h
-; CHECK-NEXT:    sel z10.d, p4, z28.d, z12.d
-; CHECK-NEXT:    sel z12.d, p11, z28.d, z18.d
+; CHECK-NEXT:    sel z10.d, p4, z29.d, z12.d
+; CHECK-NEXT:    sel z12.d, p11, z29.d, z18.d
 ; CHECK-NEXT:    fcvtzs z26.d, p5/m, z11.h
 ; CHECK-NEXT:    fcvtzs z22.d, p2/m, z17.h
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z29.h
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z30.h
 ; CHECK-NEXT:    fcvtzs z23.d, p3/m, z16.h
 ; CHECK-NEXT:    mov z10.d, p10/m, #0 // =0x0
 ; CHECK-NEXT:    mov z12.d, p1/m, #0 // =0x0
-; CHECK-NEXT:    fcmge p6.h, p0/z, z19.h, z30.h
+; CHECK-NEXT:    fcmge p6.h, p0/z, z19.h, z28.h
 ; CHECK-NEXT:    str z10, [x8, #7, mul vl]
-; CHECK-NEXT:    fcmge p7.h, p0/z, z3.h, z30.h
+; CHECK-NEXT:    fcmge p7.h, p0/z, z3.h, z28.h
 ; CHECK-NEXT:    str z12, [x8, #8, mul vl]
-; CHECK-NEXT:    mov z26.d, p4/m, z28.d
-; CHECK-NEXT:    fcmge p2.h, p0/z, z15.h, z30.h
-; CHECK-NEXT:    mov z30.d, #0x8000000000000000
+; CHECK-NEXT:    mov z26.d, p4/m, z29.d
+; CHECK-NEXT:    fcmge p2.h, p0/z, z15.h, z28.h
+; CHECK-NEXT:    mov z28.d, #0x8000000000000000
 ; CHECK-NEXT:    fcvtzs z14.d, p6/m, z19.h
-; CHECK-NEXT:    fcmgt p5.h, p0/z, z16.h, z29.h
-; CHECK-NEXT:    fcmgt p3.h, p0/z, z17.h, z29.h
+; CHECK-NEXT:    fcmgt p5.h, p0/z, z16.h, z30.h
+; CHECK-NEXT:    fcmgt p3.h, p0/z, z17.h, z30.h
 ; CHECK-NEXT:    fcvtzs z20.d, p7/m, z3.h
-; CHECK-NEXT:    fcvtzs z30.d, p2/m, z15.h
+; CHECK-NEXT:    fcvtzs z28.d, p2/m, z15.h
 ; CHECK-NEXT:    fcmuo p1.h, p0/z, z11.h, z11.h
 ; CHECK-NEXT:    fcmuo p2.h, p0/z, z16.h, z16.h
-; CHECK-NEXT:    sel z11.d, p5, z28.d, z23.d
-; CHECK-NEXT:    sel z16.d, p3, z28.d, z22.d
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z19.h, z29.h
-; CHECK-NEXT:    fcmgt p3.h, p0/z, z15.h, z29.h
+; CHECK-NEXT:    sel z11.d, p5, z29.d, z23.d
+; CHECK-NEXT:    sel z16.d, p3, z29.d, z22.d
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z19.h, z30.h
+; CHECK-NEXT:    fcmgt p3.h, p0/z, z15.h, z30.h
 ; CHECK-NEXT:    mov z26.d, p1/m, #0 // =0x0
 ; CHECK-NEXT:    mov z11.d, p2/m, #0 // =0x0
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z13.h, z29.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z13.h, z30.h
 ; CHECK-NEXT:    fcmuo p6.h, p0/z, z17.h, z17.h
 ; CHECK-NEXT:    str z26, [x8, #15, mul vl]
-; CHECK-NEXT:    sel z26.d, p4, z28.d, z14.d
+; CHECK-NEXT:    sel z26.d, p4, z29.d, z14.d
 ; CHECK-NEXT:    str z11, [x8, #14, mul vl]
-; CHECK-NEXT:    mov z30.d, p3/m, z28.d
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z3.h, z29.h
+; CHECK-NEXT:    mov z28.d, p3/m, z29.d
+; CHECK-NEXT:    fcmgt p2.h, p0/z, z3.h, z30.h
 ; CHECK-NEXT:    fcmuo p4.h, p0/z, z13.h, z13.h
 ; CHECK-NEXT:    fcmuo p3.h, p0/z, z3.h, z3.h
-; CHECK-NEXT:    sel z3.d, p1, z28.d, z21.d
+; CHECK-NEXT:    sel z3.d, p1, z29.d, z21.d
 ; CHECK-NEXT:    mov z16.d, p6/m, #0 // =0x0
-; CHECK-NEXT:    fcmgt p12.h, p0/z, z27.h, z29.h
-; CHECK-NEXT:    sel z11.d, p2, z28.d, z20.d
+; CHECK-NEXT:    fcmgt p12.h, p0/z, z27.h, z30.h
+; CHECK-NEXT:    sel z11.d, p2, z29.d, z20.d
 ; CHECK-NEXT:    str z16, [x8, #13, mul vl]
 ; CHECK-NEXT:    mov z3.d, p4/m, #0 // =0x0
 ; CHECK-NEXT:    fcmuo p6.h, p0/z, z15.h, z15.h
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z4.h, z29.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z4.h, z30.h
 ; CHECK-NEXT:    mov z11.d, p3/m, #0 // =0x0
-; CHECK-NEXT:    mov z9.d, p12/m, z28.d
+; CHECK-NEXT:    mov z9.d, p12/m, z29.d
 ; CHECK-NEXT:    str z3, [x8, #11, mul vl]
 ; CHECK-NEXT:    fcmuo p5.h, p0/z, z19.h, z19.h
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z5.h, z29.h
+; CHECK-NEXT:    fcmgt p2.h, p0/z, z5.h, z30.h
 ; CHECK-NEXT:    str z11, [x8, #10, mul vl]
-; CHECK-NEXT:    mov z30.d, p6/m, #0 // =0x0
-; CHECK-NEXT:    sel z3.d, p1, z28.d, z7.d
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z6.h, z29.h
+; CHECK-NEXT:    mov z28.d, p6/m, #0 // =0x0
+; CHECK-NEXT:    sel z3.d, p1, z29.d, z7.d
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z6.h, z30.h
 ; CHECK-NEXT:    fcmuo p3.h, p0/z, z27.h, z27.h
-; CHECK-NEXT:    str z30, [x8, #12, mul vl]
+; CHECK-NEXT:    str z28, [x8, #12, mul vl]
 ; CHECK-NEXT:    mov z26.d, p5/m, #0 // =0x0
-; CHECK-NEXT:    sel z7.d, p2, z28.d, z24.d
-; CHECK-NEXT:    fcmgt p6.h, p0/z, z31.h, z29.h
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z29.h
+; CHECK-NEXT:    sel z7.d, p2, z29.d, z24.d
+; CHECK-NEXT:    fcmgt p6.h, p0/z, z31.h, z30.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z30.h
 ; CHECK-NEXT:    str z26, [x8, #9, mul vl]
-; CHECK-NEXT:    sel z24.d, p4, z28.d, z25.d
+; CHECK-NEXT:    sel z24.d, p4, z29.d, z25.d
 ; CHECK-NEXT:    mov z9.d, p3/m, #0 // =0x0
 ; CHECK-NEXT:    fcmuo p5.h, p0/z, z31.h, z31.h
 ; CHECK-NEXT:    fcmuo p2.h, p0/z, z6.h, z6.h
-; CHECK-NEXT:    mov z2.d, p6/m, z28.d
+; CHECK-NEXT:    mov z2.d, p6/m, z29.d
 ; CHECK-NEXT:    str z9, [x8, #5, mul vl]
-; CHECK-NEXT:    mov z0.d, p1/m, z28.d
+; CHECK-NEXT:    mov z0.d, p1/m, z29.d
 ; CHECK-NEXT:    fcmuo p3.h, p0/z, z5.h, z5.h
 ; CHECK-NEXT:    fcmuo p4.h, p0/z, z4.h, z4.h
 ; CHECK-NEXT:    mov z2.d, p5/m, #0 // =0x0

diff  --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
index 5cca5539048b5..1ceaa5ad27734 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
@@ -509,6 +509,294 @@ define <vscale x 2 x bfloat> @splat_nxv2bf16_imm() {
   ret <vscale x 2 x bfloat> splat(bfloat 1.0)
 }
 
+define <vscale x 2 x half> @splat_nzero_nxv2f16() {
+; CHECK-LABEL: splat_nzero_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x half> splat (half -0.0)
+}
+
+define <vscale x 4 x half> @splat_nzero_nxv4f16() {
+; CHECK-LABEL: splat_nzero_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x half> splat (half -0.0)
+}
+
+define <vscale x 8 x half> @splat_nzero_nxv8f16() {
+; CHECK-LABEL: splat_nzero_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x half> splat (half -0.0)
+}
+
+define <vscale x 2 x float> @splat_nzero_nxv2f32() {
+; CHECK-LABEL: splat_nzero_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0x80000000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x float> splat (float -0.0)
+}
+
+define <vscale x 4 x float> @splat_nzero_nxv4f32() {
+; CHECK-LABEL: splat_nzero_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0x80000000
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x float> splat (float -0.0)
+}
+
+define <vscale x 2 x double> @splat_nzero_nxv2f64() {
+; CHECK-LABEL: splat_nzero_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, #0x8000000000000000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x double> splat (double -0.0)
+}
+
+define <vscale x 2 x bfloat> @splat_nzero_nxv2bf16() {
+; CHECK-LABEL: splat_nzero_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x bfloat> splat (bfloat -0.0)
+}
+
+define <vscale x 4 x bfloat> @splat_nzero_nxv4bf16() {
+; CHECK-LABEL: splat_nzero_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x bfloat> splat (bfloat -0.0)
+}
+
+define <vscale x 8 x bfloat> @splat_nzero_nxv8bf16() {
+; CHECK-LABEL: splat_nzero_nxv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x bfloat> splat (bfloat -0.0)
+}
+
+define <vscale x 2 x half> @splat_pinf_nxv2f16() {
+; CHECK-LABEL: splat_pinf_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x7c00
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x half> splat (half 0x7FF0000000000000)
+}
+
+define <vscale x 4 x half> @splat_pinf_nxv4f16() {
+; CHECK-LABEL: splat_pinf_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x7c00
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x half> splat (half 0x7FF0000000000000)
+}
+
+define <vscale x 8 x half> @splat_pinf_nxv8f16() {
+; CHECK-LABEL: splat_pinf_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x7c00
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x half> splat (half 0x7FF0000000000000)
+}
+
+define <vscale x 2 x float> @splat_pinf_nxv2f32() {
+; CHECK-LABEL: splat_pinf_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0x7f800000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x float> splat (float 0x7FF0000000000000)
+}
+
+define <vscale x 4 x float> @splat_pinf_nxv4f32() {
+; CHECK-LABEL: splat_pinf_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0x7f800000
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x float> splat (float 0x7FF0000000000000)
+}
+
+define <vscale x 2 x double> @splat_pinf_nxv2f64() {
+; CHECK-LABEL: splat_pinf_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, #0x7ff0000000000000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x double> splat (double 0x7FF0000000000000)
+}
+
+define <vscale x 2 x bfloat> @splat_pinf_nxv2bf16() {
+; CHECK-LABEL: splat_pinf_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #32640 // =0x7f80
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x bfloat> splat (bfloat 0x7FF0000000000000)
+}
+
+define <vscale x 4 x bfloat> @splat_pinf_nxv4bf16() {
+; CHECK-LABEL: splat_pinf_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #32640 // =0x7f80
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x bfloat> splat (bfloat 0x7FF0000000000000)
+}
+
+define <vscale x 8 x bfloat> @splat_pinf_nxv8bf16() {
+; CHECK-LABEL: splat_pinf_nxv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #32640 // =0x7f80
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x bfloat> splat (bfloat 0x7FF0000000000000)
+}
+
+define <vscale x 2 x half> @splat_ninf_nxv2f16() {
+; CHECK-LABEL: splat_ninf_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0xfc00
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x half> splat (half 0xFFF0000000000000)
+}
+
+define <vscale x 4 x half> @splat_ninf_nxv4f16() {
+; CHECK-LABEL: splat_ninf_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0xfc00
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x half> splat (half 0xFFF0000000000000)
+}
+
+define <vscale x 8 x half> @splat_ninf_nxv8f16() {
+; CHECK-LABEL: splat_ninf_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0xfc00
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x half> splat (half 0xFFF0000000000000)
+}
+
+define <vscale x 2 x float> @splat_ninf_nxv2f32() {
+; CHECK-LABEL: splat_ninf_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0xff800000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x float> splat (float 0xFFF0000000000000)
+}
+
+define <vscale x 4 x float> @splat_ninf_nxv4f32() {
+; CHECK-LABEL: splat_ninf_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0xff800000
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x float> splat (float 0xFFF0000000000000)
+}
+
+define <vscale x 2 x double> @splat_ninf_nxv2f64() {
+; CHECK-LABEL: splat_ninf_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, #0xfff0000000000000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x double> splat (double 0xFFF0000000000000)
+}
+
+define <vscale x 2 x bfloat> @splat_ninf_nxv2bf16() {
+; CHECK-LABEL: splat_ninf_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0xff80
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x bfloat> splat (bfloat 0xFFF0000000000000)
+}
+
+define <vscale x 4 x bfloat> @splat_ninf_nxv4bf16() {
+; CHECK-LABEL: splat_ninf_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0xff80
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x bfloat> splat (bfloat 0xFFF0000000000000)
+}
+
+define <vscale x 8 x bfloat> @splat_ninf_nxv8bf16() {
+; CHECK-LABEL: splat_ninf_nxv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0xff80
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x bfloat> splat (bfloat 0xFFF0000000000000)
+}
+
+define <vscale x 2 x half> @splat_nan_nxv2f16() {
+; CHECK-LABEL: splat_nan_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x7e00
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x half> splat (half 0x7FF8000000000000)
+}
+
+define <vscale x 4 x half> @splat_nan_nxv4f16() {
+; CHECK-LABEL: splat_nan_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x7e00
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x half> splat (half 0x7FF8000000000000)
+}
+
+define <vscale x 8 x half> @splat_nan_nxv8f16() {
+; CHECK-LABEL: splat_nan_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x7e00
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x half> splat (half 0x7FF8000000000000)
+}
+
+define <vscale x 2 x float> @splat_nan_nxv2f32() {
+; CHECK-LABEL: splat_nan_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0x7fc00000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x float> splat (float 0x7FF8000000000000)
+}
+
+define <vscale x 4 x float> @splat_nan_nxv4f32() {
+; CHECK-LABEL: splat_nan_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0x7fc00000
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x float> splat (float 0x7FF8000000000000)
+}
+
+define <vscale x 2 x double> @splat_nan_nxv2f64() {
+; CHECK-LABEL: splat_nan_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, #0x7ff8000000000000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x double> splat (double 0x7FF8000000000000)
+}
+
+define <vscale x 2 x bfloat> @splat_nan_nxv2bf16() {
+; CHECK-LABEL: splat_nan_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #32704 // =0x7fc0
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x bfloat> splat (bfloat 0x7FF8000000000000)
+}
+
+define <vscale x 4 x bfloat> @splat_nan_nxv4bf16() {
+; CHECK-LABEL: splat_nan_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #32704 // =0x7fc0
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x bfloat> splat (bfloat 0x7FF8000000000000)
+}
+
+define <vscale x 8 x bfloat> @splat_nan_nxv8bf16() {
+; CHECK-LABEL: splat_nan_nxv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #32704 // =0x7fc0
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x bfloat> splat (bfloat 0x7FF8000000000000)
+}
+
 define <vscale x 4 x i32> @splat_nxv4i32_fold(<vscale x 4 x i32> %x) {
 ; CHECK-LABEL: splat_nxv4i32_fold:
 ; CHECK:       // %bb.0:
@@ -581,8 +869,8 @@ define <vscale x 2 x double> @splat_nxv2f64_imm_out_of_range() {
 ; CHECK-LABEL: splat_nxv2f64_imm_out_of_range:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    adrp x8, .LCPI60_0
-; CHECK-NEXT:    add x8, x8, :lo12:.LCPI60_0
+; CHECK-NEXT:    adrp x8, .LCPI96_0
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI96_0
 ; CHECK-NEXT:    ld1rd { z0.d }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   ret <vscale x 2 x double> splat(double 3.33)

diff  --git a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
index 6b5b3d6d436cb..b04029c273ae2 100644
--- a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
@@ -338,8 +338,7 @@ ret <vscale x 2 x double> %sel
 define <vscale x 8 x half> @sel_merge_nxv8f16_negative_zero(<vscale x 8 x i1> %p, <vscale x 8 x half> %in) {
 ; CHECK-LABEL: sel_merge_nxv8f16_negative_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
-; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    dupm z1.h, #0x8000
 ; CHECK-NEXT:    mov z0.h, p0/m, z1.h
 ; CHECK-NEXT:    ret
 %sel = select <vscale x 8 x i1> %p, <vscale x 8 x half> splat (half -0.0), <vscale x 8 x half> %in
@@ -349,8 +348,7 @@ ret <vscale x 8 x half> %sel
 define <vscale x 4 x half> @sel_merge_nx4f16_negative_zero(<vscale x 4 x i1> %p, <vscale x 4 x half> %in) {
 ; CHECK-LABEL: sel_merge_nx4f16_negative_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
-; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    dupm z1.h, #0x8000
 ; CHECK-NEXT:    mov z0.s, p0/m, z1.s
 ; CHECK-NEXT:    ret
 %sel = select <vscale x 4 x i1> %p, <vscale x 4 x half> splat (half -0.0), <vscale x 4 x half> %in
@@ -360,8 +358,7 @@ ret <vscale x 4 x half> %sel
 define <vscale x 2 x half> @sel_merge_nx2f16_negative_zero(<vscale x 2 x i1> %p, <vscale x 2 x half> %in) {
 ; CHECK-LABEL: sel_merge_nx2f16_negative_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
-; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    dupm z1.h, #0x8000
 ; CHECK-NEXT:    mov z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
 %sel = select <vscale x 2 x i1> %p, <vscale x 2 x half> splat (half -0.0), <vscale x 2 x half> %in
@@ -371,8 +368,7 @@ ret <vscale x 2 x half> %sel
 define <vscale x 4 x float> @sel_merge_nx4f32_negative_zero(<vscale x 4 x i1> %p, <vscale x 4 x float> %in) {
 ; CHECK-LABEL: sel_merge_nx4f32_negative_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
-; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    mov z1.s, #0x80000000
 ; CHECK-NEXT:    mov z0.s, p0/m, z1.s
 ; CHECK-NEXT:    ret
 %sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> splat (float -0.0), <vscale x 4 x float> %in
@@ -382,8 +378,7 @@ ret <vscale x 4 x float> %sel
 define <vscale x 2 x float> @sel_merge_nx2f32_negative_zero(<vscale x 2 x i1> %p, <vscale x 2 x float> %in) {
 ; CHECK-LABEL: sel_merge_nx2f32_negative_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
-; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    mov z1.s, #0x80000000
 ; CHECK-NEXT:    mov z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
 %sel = select <vscale x 2 x i1> %p, <vscale x 2 x float> splat (float -0.0), <vscale x 2 x float> %in
@@ -393,8 +388,7 @@ ret <vscale x 2 x float> %sel
 define <vscale x 2 x double> @sel_merge_nx2f64_negative_zero(<vscale x 2 x i1> %p, <vscale x 2 x double> %in) {
 ; CHECK-LABEL: sel_merge_nx2f64_negative_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    mov z1.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
 %sel = select <vscale x 2 x i1> %p, <vscale x 2 x double> splat (double -0.0), <vscale x 2 x double> %in


        


More information about the llvm-commits mailing list