[llvm] a11509c - [AArch64][GlobalISel] Scalarize i128/fp128 vector loads/stores.

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 8 00:50:08 PDT 2024


Author: David Green
Date: 2024-10-08T08:50:03+01:00
New Revision: a11509c3b971132785024c830073fd11d14bd3fd

URL: https://github.com/llvm/llvm-project/commit/a11509c3b971132785024c830073fd11d14bd3fd
DIFF: https://github.com/llvm/llvm-project/commit/a11509c3b971132785024c830073fd11d14bd3fd.diff

LOG: [AArch64][GlobalISel] Scalarize i128/fp128 vector loads/stores.

Similar to other operations, we should scalarize these to allow the individual
elements to legalize.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
    llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
    llvm/test/CodeGen/AArch64/load.ll
    llvm/test/CodeGen/AArch64/store.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 3d313ca00f1259..a69894839361bc 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -422,7 +422,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
                    return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
                  })
       .customIf(IsPtrVecPred)
-      .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
+      .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
+      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
 
   StoreActions
       .customIf([=](const LegalityQuery &Query) {
@@ -463,7 +464,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
                    return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
                  })
       .customIf(IsPtrVecPred)
-      .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
+      .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
+      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
 
   getActionDefinitionsBuilder(G_INDEXED_STORE)
       // Idx 0 == Ptr, Idx 1 == Val

diff  --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index 2d568e858c36b7..29a9082173ea51 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -8,7 +8,6 @@
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_signed_v4f16_v4i50
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_signed_v8f16_v8i19
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_signed_v8f16_v8i50
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_signed_v8f16_v8i128
 
 ;
 ; Float to signed 32-bit -- Vector size variation
@@ -4496,183 +4495,269 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 }
 
 define <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) {
-; CHECK-LABEL: test_signed_v8f16_v8i128:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #192
-; CHECK-NEXT:    str d10, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT:    stp d9, d8, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x29, x30, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x28, x27, [sp, #112] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x26, x25, [sp, #128] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x24, x23, [sp, #144] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #160] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #176] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 192
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w23, -40
-; CHECK-NEXT:    .cfi_offset w24, -48
-; CHECK-NEXT:    .cfi_offset w25, -56
-; CHECK-NEXT:    .cfi_offset w26, -64
-; CHECK-NEXT:    .cfi_offset w27, -72
-; CHECK-NEXT:    .cfi_offset w28, -80
-; CHECK-NEXT:    .cfi_offset w30, -88
-; CHECK-NEXT:    .cfi_offset w29, -96
-; CHECK-NEXT:    .cfi_offset b8, -104
-; CHECK-NEXT:    .cfi_offset b9, -112
-; CHECK-NEXT:    .cfi_offset b10, -128
-; CHECK-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    mov x19, x8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    movi v9.2s, #255, lsl #24
-; CHECK-NEXT:    mov w8, #2130706431 // =0x7effffff
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    fmov s10, w8
-; CHECK-NEXT:    mov x22, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT:    mov x23, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT:    mov h0, v0.h[1]
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x22, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    csel x9, x23, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x8, xzr, x8, vs
-; CHECK-NEXT:    str x8, [sp, #72] // 8-byte Folded Spill
-; CHECK-NEXT:    csel x8, xzr, x9, vs
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    str x8, [sp, #24] // 8-byte Folded Spill
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    mov h0, v0.h[2]
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x22, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    csel x9, x23, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x10, xzr, x8, vs
-; CHECK-NEXT:    csel x8, xzr, x9, vs
-; CHECK-NEXT:    stp x8, x10, [sp, #8] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    mov h0, v0.h[3]
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x22, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    csel x9, x23, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x8, xzr, x8, vs
-; CHECK-NEXT:    str x8, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT:    csel x8, xzr, x9, vs
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    str x8, [sp] // 8-byte Folded Spill
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x22, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    csel x9, x23, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x28, xzr, x8, vs
-; CHECK-NEXT:    csel x29, xzr, x9, vs
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    mov h0, v0.h[1]
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x22, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    csel x9, x23, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x20, xzr, x8, vs
-; CHECK-NEXT:    csel x21, xzr, x9, vs
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    mov h0, v0.h[2]
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x22, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    csel x9, x23, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x24, xzr, x8, vs
-; CHECK-NEXT:    csel x25, xzr, x9, vs
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    mov h0, v0.h[3]
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x22, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    csel x9, x23, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csel x26, xzr, x8, vs
-; CHECK-NEXT:    csel x27, xzr, x9, vs
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    stp x26, x27, [x19, #32]
-; CHECK-NEXT:    stp x24, x25, [x19, #16]
-; CHECK-NEXT:    stp x20, x21, [x19]
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, x22, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    stp x28, x29, [x19, #112]
-; CHECK-NEXT:    csel x9, x23, x9, gt
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    csel x9, xzr, x9, vs
-; CHECK-NEXT:    csel x8, xzr, x8, vs
-; CHECK-NEXT:    stp x8, x9, [x19, #48]
-; CHECK-NEXT:    ldr x8, [sp] // 8-byte Folded Reload
-; CHECK-NEXT:    str x8, [x19, #104]
-; CHECK-NEXT:    ldr x8, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT:    str x8, [x19, #96]
-; CHECK-NEXT:    ldr x8, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    str x8, [x19, #88]
-; CHECK-NEXT:    ldr x8, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    str x8, [x19, #80]
-; CHECK-NEXT:    ldr x8, [sp, #24] // 8-byte Folded Reload
-; CHECK-NEXT:    str x8, [x19, #72]
-; CHECK-NEXT:    ldr x8, [sp, #72] // 8-byte Folded Reload
-; CHECK-NEXT:    str x8, [x19, #64]
-; CHECK-NEXT:    ldp x20, x19, [sp, #176] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr d10, [sp, #64] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x22, x21, [sp, #160] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x24, x23, [sp, #144] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x26, x25, [sp, #128] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x28, x27, [sp, #112] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x29, x30, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    add sp, sp, #192
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_signed_v8f16_v8i128:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #192
+; CHECK-SD-NEXT:    str d10, [sp, #64] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x29, x30, [sp, #96] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x28, x27, [sp, #112] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x26, x25, [sp, #128] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x24, x23, [sp, #144] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #160] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #176] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 192
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w23, -40
+; CHECK-SD-NEXT:    .cfi_offset w24, -48
+; CHECK-SD-NEXT:    .cfi_offset w25, -56
+; CHECK-SD-NEXT:    .cfi_offset w26, -64
+; CHECK-SD-NEXT:    .cfi_offset w27, -72
+; CHECK-SD-NEXT:    .cfi_offset w28, -80
+; CHECK-SD-NEXT:    .cfi_offset w30, -88
+; CHECK-SD-NEXT:    .cfi_offset w29, -96
+; CHECK-SD-NEXT:    .cfi_offset b8, -104
+; CHECK-SD-NEXT:    .cfi_offset b9, -112
+; CHECK-SD-NEXT:    .cfi_offset b10, -128
+; CHECK-SD-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    mov x19, x8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    movi v9.2s, #255, lsl #24
+; CHECK-SD-NEXT:    mov w8, #2130706431 // =0x7effffff
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fmov s10, w8
+; CHECK-SD-NEXT:    mov x22, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT:    mov x23, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NEXT:    mov h0, v0.h[1]
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x22, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    csel x9, x23, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x8, xzr, x8, vs
+; CHECK-SD-NEXT:    str x8, [sp, #72] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    csel x8, xzr, x9, vs
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    str x8, [sp, #24] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[2]
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x22, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    csel x9, x23, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x10, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x8, xzr, x9, vs
+; CHECK-SD-NEXT:    stp x8, x10, [sp, #8] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x22, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    csel x9, x23, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x8, xzr, x8, vs
+; CHECK-SD-NEXT:    str x8, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    csel x8, xzr, x9, vs
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    str x8, [sp] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x22, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    csel x9, x23, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x28, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x29, xzr, x9, vs
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[1]
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x22, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    csel x9, x23, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x20, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x21, xzr, x9, vs
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[2]
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x22, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    csel x9, x23, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x24, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x25, xzr, x9, vs
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x22, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    csel x9, x23, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csel x26, xzr, x8, vs
+; CHECK-SD-NEXT:    csel x27, xzr, x9, vs
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixsfti
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    stp x26, x27, [x19, #32]
+; CHECK-SD-NEXT:    stp x24, x25, [x19, #16]
+; CHECK-SD-NEXT:    stp x20, x21, [x19]
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    csel x9, x22, x1, lt
+; CHECK-SD-NEXT:    fcmp s8, s10
+; CHECK-SD-NEXT:    stp x28, x29, [x19, #112]
+; CHECK-SD-NEXT:    csel x9, x23, x9, gt
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    fcmp s8, s8
+; CHECK-SD-NEXT:    csel x9, xzr, x9, vs
+; CHECK-SD-NEXT:    csel x8, xzr, x8, vs
+; CHECK-SD-NEXT:    stp x8, x9, [x19, #48]
+; CHECK-SD-NEXT:    ldr x8, [sp] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    str x8, [x19, #104]
+; CHECK-SD-NEXT:    ldr x8, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    str x8, [x19, #96]
+; CHECK-SD-NEXT:    ldr x8, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    str x8, [x19, #88]
+; CHECK-SD-NEXT:    ldr x8, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    str x8, [x19, #80]
+; CHECK-SD-NEXT:    ldr x8, [sp, #24] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    str x8, [x19, #72]
+; CHECK-SD-NEXT:    ldr x8, [sp, #72] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    str x8, [x19, #64]
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #176] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr d10, [sp, #64] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #160] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x24, x23, [sp, #144] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x26, x25, [sp, #128] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x28, x27, [sp, #112] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x29, x30, [sp, #96] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    add sp, sp, #192
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v8f16_v8i128:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-CVT-NEXT:    mov h3, v0.h[3]
+; CHECK-GI-CVT-NEXT:    mov h4, v0.h[4]
+; CHECK-GI-CVT-NEXT:    fcvt s5, h0
+; CHECK-GI-CVT-NEXT:    mov h6, v0.h[5]
+; CHECK-GI-CVT-NEXT:    mov h7, v0.h[6]
+; CHECK-GI-CVT-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-CVT-NEXT:    fcvt s1, h1
+; CHECK-GI-CVT-NEXT:    fcvt s2, h2
+; CHECK-GI-CVT-NEXT:    fcvt s3, h3
+; CHECK-GI-CVT-NEXT:    fcvtzs x9, s5
+; CHECK-GI-CVT-NEXT:    fcvt s4, h4
+; CHECK-GI-CVT-NEXT:    fcvt s5, h6
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzs x10, s1
+; CHECK-GI-CVT-NEXT:    fcvt s1, h7
+; CHECK-GI-CVT-NEXT:    fcvtzs x11, s2
+; CHECK-GI-CVT-NEXT:    fcvtzs x12, s3
+; CHECK-GI-CVT-NEXT:    mov v2.d[0], x9
+; CHECK-GI-CVT-NEXT:    fcvtzs x9, s4
+; CHECK-GI-CVT-NEXT:    mov v3.d[0], x10
+; CHECK-GI-CVT-NEXT:    fcvtzs x10, s5
+; CHECK-GI-CVT-NEXT:    mov v4.d[0], x11
+; CHECK-GI-CVT-NEXT:    fcvtzs x11, s1
+; CHECK-GI-CVT-NEXT:    mov v1.d[0], x12
+; CHECK-GI-CVT-NEXT:    fcvtzs x12, s0
+; CHECK-GI-CVT-NEXT:    mov v0.d[0], x9
+; CHECK-GI-CVT-NEXT:    mov v2.d[1], xzr
+; CHECK-GI-CVT-NEXT:    mov v5.d[0], x10
+; CHECK-GI-CVT-NEXT:    mov v3.d[1], xzr
+; CHECK-GI-CVT-NEXT:    mov v4.d[1], xzr
+; CHECK-GI-CVT-NEXT:    mov v6.d[0], x11
+; CHECK-GI-CVT-NEXT:    mov v7.d[0], x12
+; CHECK-GI-CVT-NEXT:    mov v1.d[1], xzr
+; CHECK-GI-CVT-NEXT:    mov v0.d[1], xzr
+; CHECK-GI-CVT-NEXT:    mov v5.d[1], xzr
+; CHECK-GI-CVT-NEXT:    stp q2, q3, [x8]
+; CHECK-GI-CVT-NEXT:    mov v6.d[1], xzr
+; CHECK-GI-CVT-NEXT:    mov v7.d[1], xzr
+; CHECK-GI-CVT-NEXT:    stp q4, q1, [x8, #32]
+; CHECK-GI-CVT-NEXT:    stp q0, q5, [x8, #64]
+; CHECK-GI-CVT-NEXT:    stp q6, q7, [x8, #96]
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v8f16_v8i128:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT:    mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT:    fcvtzs x9, h0
+; CHECK-GI-FP16-NEXT:    mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT:    fcvtzs x10, h1
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[6]
+; CHECK-GI-FP16-NEXT:    fcvtzs x11, h2
+; CHECK-GI-FP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-FP16-NEXT:    fcvtzs x12, h3
+; CHECK-GI-FP16-NEXT:    mov v2.d[0], x9
+; CHECK-GI-FP16-NEXT:    fcvtzs x9, h4
+; CHECK-GI-FP16-NEXT:    mov v3.d[0], x10
+; CHECK-GI-FP16-NEXT:    fcvtzs x10, h5
+; CHECK-GI-FP16-NEXT:    mov v4.d[0], x11
+; CHECK-GI-FP16-NEXT:    fcvtzs x11, h1
+; CHECK-GI-FP16-NEXT:    mov v1.d[0], x12
+; CHECK-GI-FP16-NEXT:    fcvtzs x12, h0
+; CHECK-GI-FP16-NEXT:    mov v0.d[0], x9
+; CHECK-GI-FP16-NEXT:    mov v2.d[1], xzr
+; CHECK-GI-FP16-NEXT:    mov v5.d[0], x10
+; CHECK-GI-FP16-NEXT:    mov v3.d[1], xzr
+; CHECK-GI-FP16-NEXT:    mov v4.d[1], xzr
+; CHECK-GI-FP16-NEXT:    mov v6.d[0], x11
+; CHECK-GI-FP16-NEXT:    mov v7.d[0], x12
+; CHECK-GI-FP16-NEXT:    mov v1.d[1], xzr
+; CHECK-GI-FP16-NEXT:    mov v0.d[1], xzr
+; CHECK-GI-FP16-NEXT:    mov v5.d[1], xzr
+; CHECK-GI-FP16-NEXT:    stp q2, q3, [x8]
+; CHECK-GI-FP16-NEXT:    mov v6.d[1], xzr
+; CHECK-GI-FP16-NEXT:    mov v7.d[1], xzr
+; CHECK-GI-FP16-NEXT:    stp q4, q1, [x8, #32]
+; CHECK-GI-FP16-NEXT:    stp q0, q5, [x8, #64]
+; CHECK-GI-FP16-NEXT:    stp q6, q7, [x8, #96]
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <8 x i128> @llvm.fptosi.sat.v8f16.v8i128(<8 x half> %f)
     ret <8 x i128> %x
 }

diff  --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index f63fba9dab6c63..046ec0d0790296 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -8,7 +8,6 @@
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_unsigned_v4f16_v4i50
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_unsigned_v8f16_v8i19
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_unsigned_v8f16_v8i50
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_unsigned_v8f16_v8i128
 
 ;
 ; Float to unsigned 32-bit -- Vector size variation
@@ -3729,147 +3728,233 @@ define <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 }
 
 define <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) {
-; CHECK-LABEL: test_unsigned_v8f16_v8i128:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #176
-; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x29, x30, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x28, x27, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x26, x25, [sp, #112] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x24, x23, [sp, #128] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #144] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #160] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 176
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w23, -40
-; CHECK-NEXT:    .cfi_offset w24, -48
-; CHECK-NEXT:    .cfi_offset w25, -56
-; CHECK-NEXT:    .cfi_offset w26, -64
-; CHECK-NEXT:    .cfi_offset w27, -72
-; CHECK-NEXT:    .cfi_offset w28, -80
-; CHECK-NEXT:    .cfi_offset w30, -88
-; CHECK-NEXT:    .cfi_offset w29, -96
-; CHECK-NEXT:    .cfi_offset b8, -104
-; CHECK-NEXT:    .cfi_offset b9, -112
-; CHECK-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    mov x19, x8
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    mov w8, #2139095039 // =0x7f7fffff
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    fmov s9, w8
-; CHECK-NEXT:    mov h0, v0.h[1]
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csinv x10, x8, xzr, le
-; CHECK-NEXT:    csinv x8, x9, xzr, le
-; CHECK-NEXT:    stp x8, x10, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov h0, v0.h[2]
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    stp x8, x9, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov h0, v0.h[3]
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csinv x24, x9, xzr, le
-; CHECK-NEXT:    csinv x25, x8, xzr, le
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csinv x26, x9, xzr, le
-; CHECK-NEXT:    csinv x27, x8, xzr, le
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov h0, v0.h[1]
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csinv x28, x9, xzr, le
-; CHECK-NEXT:    csinv x29, x8, xzr, le
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov h0, v0.h[2]
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csinv x20, x9, xzr, le
-; CHECK-NEXT:    csinv x21, x8, xzr, le
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov h0, v0.h[3]
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    fcvt s8, h0
-; CHECK-NEXT:    csinv x22, x9, xzr, le
-; CHECK-NEXT:    csinv x23, x8, xzr, le
-; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    stp x22, x23, [x19, #32]
-; CHECK-NEXT:    stp x20, x21, [x19, #16]
-; CHECK-NEXT:    stp x28, x29, [x19]
-; CHECK-NEXT:    csel x8, xzr, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    stp x26, x27, [x19, #112]
-; CHECK-NEXT:    stp x24, x25, [x19, #96]
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    stp x9, x8, [x19, #48]
-; CHECK-NEXT:    ldr x8, [sp] // 8-byte Folded Reload
-; CHECK-NEXT:    str x8, [x19, #88]
-; CHECK-NEXT:    ldr x8, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    str x8, [x19, #80]
-; CHECK-NEXT:    ldr x8, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    str x8, [x19, #72]
-; CHECK-NEXT:    ldr x8, [sp, #24] // 8-byte Folded Reload
-; CHECK-NEXT:    str x8, [x19, #64]
-; CHECK-NEXT:    ldp x20, x19, [sp, #160] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x22, x21, [sp, #144] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x24, x23, [sp, #128] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x26, x25, [sp, #112] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x28, x27, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x29, x30, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    add sp, sp, #176
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_unsigned_v8f16_v8i128:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub sp, sp, #176
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x29, x30, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x28, x27, [sp, #96] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x26, x25, [sp, #112] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x24, x23, [sp, #128] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #144] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #160] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 176
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
+; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w23, -40
+; CHECK-SD-NEXT:    .cfi_offset w24, -48
+; CHECK-SD-NEXT:    .cfi_offset w25, -56
+; CHECK-SD-NEXT:    .cfi_offset w26, -64
+; CHECK-SD-NEXT:    .cfi_offset w27, -72
+; CHECK-SD-NEXT:    .cfi_offset w28, -80
+; CHECK-SD-NEXT:    .cfi_offset w30, -88
+; CHECK-SD-NEXT:    .cfi_offset w29, -96
+; CHECK-SD-NEXT:    .cfi_offset b8, -104
+; CHECK-SD-NEXT:    .cfi_offset b9, -112
+; CHECK-SD-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    mov x19, x8
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov w8, #2139095039 // =0x7f7fffff
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    fmov s9, w8
+; CHECK-SD-NEXT:    mov h0, v0.h[1]
+; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csinv x10, x8, xzr, le
+; CHECK-SD-NEXT:    csinv x8, x9, xzr, le
+; CHECK-SD-NEXT:    stp x8, x10, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov h0, v0.h[2]
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    stp x8, x9, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csinv x24, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x25, x8, xzr, le
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csinv x26, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x27, x8, xzr, le
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov h0, v0.h[1]
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csinv x28, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x29, x8, xzr, le
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov h0, v0.h[2]
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csinv x20, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x21, x8, xzr, le
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    fcvt s8, h0
+; CHECK-SD-NEXT:    csinv x22, x9, xzr, le
+; CHECK-SD-NEXT:    csinv x23, x8, xzr, le
+; CHECK-SD-NEXT:    fmov s0, s8
+; CHECK-SD-NEXT:    bl __fixunssfti
+; CHECK-SD-NEXT:    fcmp s8, #0.0
+; CHECK-SD-NEXT:    stp x22, x23, [x19, #32]
+; CHECK-SD-NEXT:    stp x20, x21, [x19, #16]
+; CHECK-SD-NEXT:    stp x28, x29, [x19]
+; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
+; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
+; CHECK-SD-NEXT:    fcmp s8, s9
+; CHECK-SD-NEXT:    stp x26, x27, [x19, #112]
+; CHECK-SD-NEXT:    stp x24, x25, [x19, #96]
+; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
+; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
+; CHECK-SD-NEXT:    stp x9, x8, [x19, #48]
+; CHECK-SD-NEXT:    ldr x8, [sp] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    str x8, [x19, #88]
+; CHECK-SD-NEXT:    ldr x8, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    str x8, [x19, #80]
+; CHECK-SD-NEXT:    ldr x8, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    str x8, [x19, #72]
+; CHECK-SD-NEXT:    ldr x8, [sp, #24] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    str x8, [x19, #64]
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #160] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #144] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x24, x23, [sp, #128] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x26, x25, [sp, #112] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x28, x27, [sp, #96] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x29, x30, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    add sp, sp, #176
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v8f16_v8i128:
+; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-CVT-NEXT:    mov h3, v0.h[3]
+; CHECK-GI-CVT-NEXT:    mov h4, v0.h[4]
+; CHECK-GI-CVT-NEXT:    fcvt s5, h0
+; CHECK-GI-CVT-NEXT:    mov h6, v0.h[5]
+; CHECK-GI-CVT-NEXT:    mov h7, v0.h[6]
+; CHECK-GI-CVT-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-CVT-NEXT:    fcvt s1, h1
+; CHECK-GI-CVT-NEXT:    fcvt s2, h2
+; CHECK-GI-CVT-NEXT:    fcvt s3, h3
+; CHECK-GI-CVT-NEXT:    fcvtzu x9, s5
+; CHECK-GI-CVT-NEXT:    fcvt s4, h4
+; CHECK-GI-CVT-NEXT:    fcvt s5, h6
+; CHECK-GI-CVT-NEXT:    fcvt s0, h0
+; CHECK-GI-CVT-NEXT:    fcvtzu x10, s1
+; CHECK-GI-CVT-NEXT:    fcvt s1, h7
+; CHECK-GI-CVT-NEXT:    fcvtzu x11, s2
+; CHECK-GI-CVT-NEXT:    fcvtzu x12, s3
+; CHECK-GI-CVT-NEXT:    mov v2.d[0], x9
+; CHECK-GI-CVT-NEXT:    fcvtzu x9, s4
+; CHECK-GI-CVT-NEXT:    mov v3.d[0], x10
+; CHECK-GI-CVT-NEXT:    fcvtzu x10, s5
+; CHECK-GI-CVT-NEXT:    mov v4.d[0], x11
+; CHECK-GI-CVT-NEXT:    fcvtzu x11, s1
+; CHECK-GI-CVT-NEXT:    mov v1.d[0], x12
+; CHECK-GI-CVT-NEXT:    fcvtzu x12, s0
+; CHECK-GI-CVT-NEXT:    mov v0.d[0], x9
+; CHECK-GI-CVT-NEXT:    mov v2.d[1], xzr
+; CHECK-GI-CVT-NEXT:    mov v5.d[0], x10
+; CHECK-GI-CVT-NEXT:    mov v3.d[1], xzr
+; CHECK-GI-CVT-NEXT:    mov v4.d[1], xzr
+; CHECK-GI-CVT-NEXT:    mov v6.d[0], x11
+; CHECK-GI-CVT-NEXT:    mov v7.d[0], x12
+; CHECK-GI-CVT-NEXT:    mov v1.d[1], xzr
+; CHECK-GI-CVT-NEXT:    mov v0.d[1], xzr
+; CHECK-GI-CVT-NEXT:    mov v5.d[1], xzr
+; CHECK-GI-CVT-NEXT:    stp q2, q3, [x8]
+; CHECK-GI-CVT-NEXT:    mov v6.d[1], xzr
+; CHECK-GI-CVT-NEXT:    mov v7.d[1], xzr
+; CHECK-GI-CVT-NEXT:    stp q4, q1, [x8, #32]
+; CHECK-GI-CVT-NEXT:    stp q0, q5, [x8, #64]
+; CHECK-GI-CVT-NEXT:    stp q6, q7, [x8, #96]
+; CHECK-GI-CVT-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v8f16_v8i128:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT:    mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT:    fcvtzu x9, h0
+; CHECK-GI-FP16-NEXT:    mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT:    fcvtzu x10, h1
+; CHECK-GI-FP16-NEXT:    mov h1, v0.h[6]
+; CHECK-GI-FP16-NEXT:    fcvtzu x11, h2
+; CHECK-GI-FP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-FP16-NEXT:    fcvtzu x12, h3
+; CHECK-GI-FP16-NEXT:    mov v2.d[0], x9
+; CHECK-GI-FP16-NEXT:    fcvtzu x9, h4
+; CHECK-GI-FP16-NEXT:    mov v3.d[0], x10
+; CHECK-GI-FP16-NEXT:    fcvtzu x10, h5
+; CHECK-GI-FP16-NEXT:    mov v4.d[0], x11
+; CHECK-GI-FP16-NEXT:    fcvtzu x11, h1
+; CHECK-GI-FP16-NEXT:    mov v1.d[0], x12
+; CHECK-GI-FP16-NEXT:    fcvtzu x12, h0
+; CHECK-GI-FP16-NEXT:    mov v0.d[0], x9
+; CHECK-GI-FP16-NEXT:    mov v2.d[1], xzr
+; CHECK-GI-FP16-NEXT:    mov v5.d[0], x10
+; CHECK-GI-FP16-NEXT:    mov v3.d[1], xzr
+; CHECK-GI-FP16-NEXT:    mov v4.d[1], xzr
+; CHECK-GI-FP16-NEXT:    mov v6.d[0], x11
+; CHECK-GI-FP16-NEXT:    mov v7.d[0], x12
+; CHECK-GI-FP16-NEXT:    mov v1.d[1], xzr
+; CHECK-GI-FP16-NEXT:    mov v0.d[1], xzr
+; CHECK-GI-FP16-NEXT:    mov v5.d[1], xzr
+; CHECK-GI-FP16-NEXT:    stp q2, q3, [x8]
+; CHECK-GI-FP16-NEXT:    mov v6.d[1], xzr
+; CHECK-GI-FP16-NEXT:    mov v7.d[1], xzr
+; CHECK-GI-FP16-NEXT:    stp q4, q1, [x8, #32]
+; CHECK-GI-FP16-NEXT:    stp q0, q5, [x8, #64]
+; CHECK-GI-FP16-NEXT:    stp q6, q7, [x8, #96]
+; CHECK-GI-FP16-NEXT:    ret
     %x = call <8 x i128> @llvm.fptoui.sat.v8f16.v8i128(<8 x half> %f)
     ret <8 x i128> %x
 }

diff  --git a/llvm/test/CodeGen/AArch64/load.ll b/llvm/test/CodeGen/AArch64/load.ll
index a4d1c53c272aa1..167e9d1c196435 100644
--- a/llvm/test/CodeGen/AArch64/load.ll
+++ b/llvm/test/CodeGen/AArch64/load.ll
@@ -315,3 +315,34 @@ define <3 x i32> @load_v3i32(ptr %ptr){
     %a = load <3 x i32>, ptr %ptr
     ret <3 x i32> %a
 }
+
+define <2 x i128> @load_v2i128(ptr %p) {
+; CHECK-SD-LABEL: load_v2i128:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ldp x8, x1, [x0]
+; CHECK-SD-NEXT:    ldp x2, x3, [x0, #16]
+; CHECK-SD-NEXT:    mov x0, x8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: load_v2i128:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ldp q0, q1, [x0]
+; CHECK-GI-NEXT:    mov d2, v0.d[1]
+; CHECK-GI-NEXT:    mov d3, v1.d[1]
+; CHECK-GI-NEXT:    fmov x0, d0
+; CHECK-GI-NEXT:    fmov x2, d1
+; CHECK-GI-NEXT:    fmov x1, d2
+; CHECK-GI-NEXT:    fmov x3, d3
+; CHECK-GI-NEXT:    ret
+    %a = load <2 x i128>, ptr %p
+    ret <2 x i128> %a
+}
+
+define <2 x fp128> @load_v2f128(ptr %p) {
+; CHECK-LABEL: load_v2f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ret
+    %a = load <2 x fp128>, ptr %p
+    ret <2 x fp128> %a
+}

diff  --git a/llvm/test/CodeGen/AArch64/store.ll b/llvm/test/CodeGen/AArch64/store.ll
index bf22d79a4df994..86d74b69f4958f 100644
--- a/llvm/test/CodeGen/AArch64/store.ll
+++ b/llvm/test/CodeGen/AArch64/store.ll
@@ -340,3 +340,31 @@ define void @store_v3i32(<3 x i32> %a, ptr %ptr){
     store <3 x i32> %a, ptr %ptr
     ret void
 }
+
+define void @store_v2i128(<2 x i128> %a, ptr %p) {
+; CHECK-SD-LABEL: store_v2i128:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    stp x2, x3, [x4, #16]
+; CHECK-SD-NEXT:    stp x0, x1, [x4]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: store_v2i128:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov v0.d[0], x0
+; CHECK-GI-NEXT:    mov v1.d[0], x2
+; CHECK-GI-NEXT:    mov v0.d[1], x1
+; CHECK-GI-NEXT:    mov v1.d[1], x3
+; CHECK-GI-NEXT:    stp q0, q1, [x4]
+; CHECK-GI-NEXT:    ret
+    store <2 x i128> %a, ptr %p
+    ret void
+}
+
+define void @store_v2f128(<2 x fp128> %a, ptr %p) {
+; CHECK-LABEL: store_v2f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp q0, q1, [x0]
+; CHECK-NEXT:    ret
+    store <2 x fp128> %a, ptr %p
+    ret void
+}


        


More information about the llvm-commits mailing list