[llvm] a11509c - [AArch64][GlobalISel] Scalarize i128/fp128 vector loads/stores.
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 8 00:50:08 PDT 2024
Author: David Green
Date: 2024-10-08T08:50:03+01:00
New Revision: a11509c3b971132785024c830073fd11d14bd3fd
URL: https://github.com/llvm/llvm-project/commit/a11509c3b971132785024c830073fd11d14bd3fd
DIFF: https://github.com/llvm/llvm-project/commit/a11509c3b971132785024c830073fd11d14bd3fd.diff
LOG: [AArch64][GlobalISel] Scalarize i128/fp128 vector loads/stores.
Similar to other operations, we should scalarize these to allow the individual
elements to legalize.
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
llvm/test/CodeGen/AArch64/load.ll
llvm/test/CodeGen/AArch64/store.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 3d313ca00f1259..a69894839361bc 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -422,7 +422,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
})
.customIf(IsPtrVecPred)
- .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
+ .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
+ .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
StoreActions
.customIf([=](const LegalityQuery &Query) {
@@ -463,7 +464,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
})
.customIf(IsPtrVecPred)
- .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
+ .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
+ .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
getActionDefinitionsBuilder(G_INDEXED_STORE)
// Idx 0 == Ptr, Idx 1 == Val
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index 2d568e858c36b7..29a9082173ea51 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -8,7 +8,6 @@
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_signed_v4f16_v4i50
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_signed_v8f16_v8i19
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_signed_v8f16_v8i50
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_signed_v8f16_v8i128
;
; Float to signed 32-bit -- Vector size variation
@@ -4496,183 +4495,269 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
}
define <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) {
-; CHECK-LABEL: test_signed_v8f16_v8i128:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #192
-; CHECK-NEXT: str d10, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: stp x29, x30, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT: stp x28, x27, [sp, #112] // 16-byte Folded Spill
-; CHECK-NEXT: stp x26, x25, [sp, #128] // 16-byte Folded Spill
-; CHECK-NEXT: stp x24, x23, [sp, #144] // 16-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #160] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #176] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 192
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w20, -16
-; CHECK-NEXT: .cfi_offset w21, -24
-; CHECK-NEXT: .cfi_offset w22, -32
-; CHECK-NEXT: .cfi_offset w23, -40
-; CHECK-NEXT: .cfi_offset w24, -48
-; CHECK-NEXT: .cfi_offset w25, -56
-; CHECK-NEXT: .cfi_offset w26, -64
-; CHECK-NEXT: .cfi_offset w27, -72
-; CHECK-NEXT: .cfi_offset w28, -80
-; CHECK-NEXT: .cfi_offset w30, -88
-; CHECK-NEXT: .cfi_offset w29, -96
-; CHECK-NEXT: .cfi_offset b8, -104
-; CHECK-NEXT: .cfi_offset b9, -112
-; CHECK-NEXT: .cfi_offset b10, -128
-; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: mov x19, x8
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: movi v9.2s, #255, lsl #24
-; CHECK-NEXT: mov w8, #2130706431 // =0x7effffff
-; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: fmov s10, w8
-; CHECK-NEXT: mov x22, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT: mov x23, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT: mov h0, v0.h[1]
-; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: csel x8, xzr, x0, lt
-; CHECK-NEXT: csel x9, x22, x1, lt
-; CHECK-NEXT: fcmp s8, s10
-; CHECK-NEXT: csel x9, x23, x9, gt
-; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: fcmp s8, s8
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: csel x8, xzr, x8, vs
-; CHECK-NEXT: str x8, [sp, #72] // 8-byte Folded Spill
-; CHECK-NEXT: csel x8, xzr, x9, vs
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: str x8, [sp, #24] // 8-byte Folded Spill
-; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: mov h0, v0.h[2]
-; CHECK-NEXT: csel x8, xzr, x0, lt
-; CHECK-NEXT: csel x9, x22, x1, lt
-; CHECK-NEXT: fcmp s8, s10
-; CHECK-NEXT: csel x9, x23, x9, gt
-; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: fcmp s8, s8
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: csel x10, xzr, x8, vs
-; CHECK-NEXT: csel x8, xzr, x9, vs
-; CHECK-NEXT: stp x8, x10, [sp, #8] // 16-byte Folded Spill
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: mov h0, v0.h[3]
-; CHECK-NEXT: csel x8, xzr, x0, lt
-; CHECK-NEXT: csel x9, x22, x1, lt
-; CHECK-NEXT: fcmp s8, s10
-; CHECK-NEXT: csel x9, x23, x9, gt
-; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: fcmp s8, s8
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: csel x8, xzr, x8, vs
-; CHECK-NEXT: str x8, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT: csel x8, xzr, x9, vs
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: str x8, [sp] // 8-byte Folded Spill
-; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: csel x8, xzr, x0, lt
-; CHECK-NEXT: csel x9, x22, x1, lt
-; CHECK-NEXT: fcmp s8, s10
-; CHECK-NEXT: csel x9, x23, x9, gt
-; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: fcmp s8, s8
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: csel x28, xzr, x8, vs
-; CHECK-NEXT: csel x29, xzr, x9, vs
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: mov h0, v0.h[1]
-; CHECK-NEXT: csel x8, xzr, x0, lt
-; CHECK-NEXT: csel x9, x22, x1, lt
-; CHECK-NEXT: fcmp s8, s10
-; CHECK-NEXT: csel x9, x23, x9, gt
-; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: fcmp s8, s8
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: csel x20, xzr, x8, vs
-; CHECK-NEXT: csel x21, xzr, x9, vs
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: mov h0, v0.h[2]
-; CHECK-NEXT: csel x8, xzr, x0, lt
-; CHECK-NEXT: csel x9, x22, x1, lt
-; CHECK-NEXT: fcmp s8, s10
-; CHECK-NEXT: csel x9, x23, x9, gt
-; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: fcmp s8, s8
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: csel x24, xzr, x8, vs
-; CHECK-NEXT: csel x25, xzr, x9, vs
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: mov h0, v0.h[3]
-; CHECK-NEXT: csel x8, xzr, x0, lt
-; CHECK-NEXT: csel x9, x22, x1, lt
-; CHECK-NEXT: fcmp s8, s10
-; CHECK-NEXT: csel x9, x23, x9, gt
-; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: fcmp s8, s8
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: csel x26, xzr, x8, vs
-; CHECK-NEXT: csel x27, xzr, x9, vs
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: stp x26, x27, [x19, #32]
-; CHECK-NEXT: stp x24, x25, [x19, #16]
-; CHECK-NEXT: stp x20, x21, [x19]
-; CHECK-NEXT: csel x8, xzr, x0, lt
-; CHECK-NEXT: csel x9, x22, x1, lt
-; CHECK-NEXT: fcmp s8, s10
-; CHECK-NEXT: stp x28, x29, [x19, #112]
-; CHECK-NEXT: csel x9, x23, x9, gt
-; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: fcmp s8, s8
-; CHECK-NEXT: csel x9, xzr, x9, vs
-; CHECK-NEXT: csel x8, xzr, x8, vs
-; CHECK-NEXT: stp x8, x9, [x19, #48]
-; CHECK-NEXT: ldr x8, [sp] // 8-byte Folded Reload
-; CHECK-NEXT: str x8, [x19, #104]
-; CHECK-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT: str x8, [x19, #96]
-; CHECK-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: str x8, [x19, #88]
-; CHECK-NEXT: ldr x8, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT: str x8, [x19, #80]
-; CHECK-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload
-; CHECK-NEXT: str x8, [x19, #72]
-; CHECK-NEXT: ldr x8, [sp, #72] // 8-byte Folded Reload
-; CHECK-NEXT: str x8, [x19, #64]
-; CHECK-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload
-; CHECK-NEXT: ldr d10, [sp, #64] // 8-byte Folded Reload
-; CHECK-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x24, x23, [sp, #144] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x26, x25, [sp, #128] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x28, x27, [sp, #112] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x29, x30, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #192
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_signed_v8f16_v8i128:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #192
+; CHECK-SD-NEXT: str d10, [sp, #64] // 8-byte Folded Spill
+; CHECK-SD-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x29, x30, [sp, #96] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x28, x27, [sp, #112] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x26, x25, [sp, #128] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x24, x23, [sp, #144] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x22, x21, [sp, #160] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x20, x19, [sp, #176] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 192
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w20, -16
+; CHECK-SD-NEXT: .cfi_offset w21, -24
+; CHECK-SD-NEXT: .cfi_offset w22, -32
+; CHECK-SD-NEXT: .cfi_offset w23, -40
+; CHECK-SD-NEXT: .cfi_offset w24, -48
+; CHECK-SD-NEXT: .cfi_offset w25, -56
+; CHECK-SD-NEXT: .cfi_offset w26, -64
+; CHECK-SD-NEXT: .cfi_offset w27, -72
+; CHECK-SD-NEXT: .cfi_offset w28, -80
+; CHECK-SD-NEXT: .cfi_offset w30, -88
+; CHECK-SD-NEXT: .cfi_offset w29, -96
+; CHECK-SD-NEXT: .cfi_offset b8, -104
+; CHECK-SD-NEXT: .cfi_offset b9, -112
+; CHECK-SD-NEXT: .cfi_offset b10, -128
+; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: mov x19, x8
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: bl __fixsfti
+; CHECK-SD-NEXT: movi v9.2s, #255, lsl #24
+; CHECK-SD-NEXT: mov w8, #2130706431 // =0x7effffff
+; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT: fmov s10, w8
+; CHECK-SD-NEXT: mov x22, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT: mov x23, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NEXT: mov h0, v0.h[1]
+; CHECK-SD-NEXT: fcmp s8, s9
+; CHECK-SD-NEXT: csel x8, xzr, x0, lt
+; CHECK-SD-NEXT: csel x9, x22, x1, lt
+; CHECK-SD-NEXT: fcmp s8, s10
+; CHECK-SD-NEXT: csel x9, x23, x9, gt
+; CHECK-SD-NEXT: csinv x8, x8, xzr, le
+; CHECK-SD-NEXT: fcmp s8, s8
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: csel x8, xzr, x8, vs
+; CHECK-SD-NEXT: str x8, [sp, #72] // 8-byte Folded Spill
+; CHECK-SD-NEXT: csel x8, xzr, x9, vs
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: str x8, [sp, #24] // 8-byte Folded Spill
+; CHECK-SD-NEXT: bl __fixsfti
+; CHECK-SD-NEXT: fcmp s8, s9
+; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov h0, v0.h[2]
+; CHECK-SD-NEXT: csel x8, xzr, x0, lt
+; CHECK-SD-NEXT: csel x9, x22, x1, lt
+; CHECK-SD-NEXT: fcmp s8, s10
+; CHECK-SD-NEXT: csel x9, x23, x9, gt
+; CHECK-SD-NEXT: csinv x8, x8, xzr, le
+; CHECK-SD-NEXT: fcmp s8, s8
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: csel x10, xzr, x8, vs
+; CHECK-SD-NEXT: csel x8, xzr, x9, vs
+; CHECK-SD-NEXT: stp x8, x10, [sp, #8] // 16-byte Folded Spill
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: bl __fixsfti
+; CHECK-SD-NEXT: fcmp s8, s9
+; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov h0, v0.h[3]
+; CHECK-SD-NEXT: csel x8, xzr, x0, lt
+; CHECK-SD-NEXT: csel x9, x22, x1, lt
+; CHECK-SD-NEXT: fcmp s8, s10
+; CHECK-SD-NEXT: csel x9, x23, x9, gt
+; CHECK-SD-NEXT: csinv x8, x8, xzr, le
+; CHECK-SD-NEXT: fcmp s8, s8
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: csel x8, xzr, x8, vs
+; CHECK-SD-NEXT: str x8, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: csel x8, xzr, x9, vs
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: str x8, [sp] // 8-byte Folded Spill
+; CHECK-SD-NEXT: bl __fixsfti
+; CHECK-SD-NEXT: fcmp s8, s9
+; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT: csel x8, xzr, x0, lt
+; CHECK-SD-NEXT: csel x9, x22, x1, lt
+; CHECK-SD-NEXT: fcmp s8, s10
+; CHECK-SD-NEXT: csel x9, x23, x9, gt
+; CHECK-SD-NEXT: csinv x8, x8, xzr, le
+; CHECK-SD-NEXT: fcmp s8, s8
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: csel x28, xzr, x8, vs
+; CHECK-SD-NEXT: csel x29, xzr, x9, vs
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: bl __fixsfti
+; CHECK-SD-NEXT: fcmp s8, s9
+; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov h0, v0.h[1]
+; CHECK-SD-NEXT: csel x8, xzr, x0, lt
+; CHECK-SD-NEXT: csel x9, x22, x1, lt
+; CHECK-SD-NEXT: fcmp s8, s10
+; CHECK-SD-NEXT: csel x9, x23, x9, gt
+; CHECK-SD-NEXT: csinv x8, x8, xzr, le
+; CHECK-SD-NEXT: fcmp s8, s8
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: csel x20, xzr, x8, vs
+; CHECK-SD-NEXT: csel x21, xzr, x9, vs
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: bl __fixsfti
+; CHECK-SD-NEXT: fcmp s8, s9
+; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov h0, v0.h[2]
+; CHECK-SD-NEXT: csel x8, xzr, x0, lt
+; CHECK-SD-NEXT: csel x9, x22, x1, lt
+; CHECK-SD-NEXT: fcmp s8, s10
+; CHECK-SD-NEXT: csel x9, x23, x9, gt
+; CHECK-SD-NEXT: csinv x8, x8, xzr, le
+; CHECK-SD-NEXT: fcmp s8, s8
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: csel x24, xzr, x8, vs
+; CHECK-SD-NEXT: csel x25, xzr, x9, vs
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: bl __fixsfti
+; CHECK-SD-NEXT: fcmp s8, s9
+; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov h0, v0.h[3]
+; CHECK-SD-NEXT: csel x8, xzr, x0, lt
+; CHECK-SD-NEXT: csel x9, x22, x1, lt
+; CHECK-SD-NEXT: fcmp s8, s10
+; CHECK-SD-NEXT: csel x9, x23, x9, gt
+; CHECK-SD-NEXT: csinv x8, x8, xzr, le
+; CHECK-SD-NEXT: fcmp s8, s8
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: csel x26, xzr, x8, vs
+; CHECK-SD-NEXT: csel x27, xzr, x9, vs
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: bl __fixsfti
+; CHECK-SD-NEXT: fcmp s8, s9
+; CHECK-SD-NEXT: stp x26, x27, [x19, #32]
+; CHECK-SD-NEXT: stp x24, x25, [x19, #16]
+; CHECK-SD-NEXT: stp x20, x21, [x19]
+; CHECK-SD-NEXT: csel x8, xzr, x0, lt
+; CHECK-SD-NEXT: csel x9, x22, x1, lt
+; CHECK-SD-NEXT: fcmp s8, s10
+; CHECK-SD-NEXT: stp x28, x29, [x19, #112]
+; CHECK-SD-NEXT: csel x9, x23, x9, gt
+; CHECK-SD-NEXT: csinv x8, x8, xzr, le
+; CHECK-SD-NEXT: fcmp s8, s8
+; CHECK-SD-NEXT: csel x9, xzr, x9, vs
+; CHECK-SD-NEXT: csel x8, xzr, x8, vs
+; CHECK-SD-NEXT: stp x8, x9, [x19, #48]
+; CHECK-SD-NEXT: ldr x8, [sp] // 8-byte Folded Reload
+; CHECK-SD-NEXT: str x8, [x19, #104]
+; CHECK-SD-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: str x8, [x19, #96]
+; CHECK-SD-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT: str x8, [x19, #88]
+; CHECK-SD-NEXT: ldr x8, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: str x8, [x19, #80]
+; CHECK-SD-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload
+; CHECK-SD-NEXT: str x8, [x19, #72]
+; CHECK-SD-NEXT: ldr x8, [sp, #72] // 8-byte Folded Reload
+; CHECK-SD-NEXT: str x8, [x19, #64]
+; CHECK-SD-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr d10, [sp, #64] // 8-byte Folded Reload
+; CHECK-SD-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x24, x23, [sp, #144] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x26, x25, [sp, #128] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x28, x27, [sp, #112] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x29, x30, [sp, #96] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #192
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_signed_v8f16_v8i128:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: mov h1, v0.h[1]
+; CHECK-GI-CVT-NEXT: mov h2, v0.h[2]
+; CHECK-GI-CVT-NEXT: mov h3, v0.h[3]
+; CHECK-GI-CVT-NEXT: mov h4, v0.h[4]
+; CHECK-GI-CVT-NEXT: fcvt s5, h0
+; CHECK-GI-CVT-NEXT: mov h6, v0.h[5]
+; CHECK-GI-CVT-NEXT: mov h7, v0.h[6]
+; CHECK-GI-CVT-NEXT: mov h0, v0.h[7]
+; CHECK-GI-CVT-NEXT: fcvt s1, h1
+; CHECK-GI-CVT-NEXT: fcvt s2, h2
+; CHECK-GI-CVT-NEXT: fcvt s3, h3
+; CHECK-GI-CVT-NEXT: fcvtzs x9, s5
+; CHECK-GI-CVT-NEXT: fcvt s4, h4
+; CHECK-GI-CVT-NEXT: fcvt s5, h6
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: fcvtzs x10, s1
+; CHECK-GI-CVT-NEXT: fcvt s1, h7
+; CHECK-GI-CVT-NEXT: fcvtzs x11, s2
+; CHECK-GI-CVT-NEXT: fcvtzs x12, s3
+; CHECK-GI-CVT-NEXT: mov v2.d[0], x9
+; CHECK-GI-CVT-NEXT: fcvtzs x9, s4
+; CHECK-GI-CVT-NEXT: mov v3.d[0], x10
+; CHECK-GI-CVT-NEXT: fcvtzs x10, s5
+; CHECK-GI-CVT-NEXT: mov v4.d[0], x11
+; CHECK-GI-CVT-NEXT: fcvtzs x11, s1
+; CHECK-GI-CVT-NEXT: mov v1.d[0], x12
+; CHECK-GI-CVT-NEXT: fcvtzs x12, s0
+; CHECK-GI-CVT-NEXT: mov v0.d[0], x9
+; CHECK-GI-CVT-NEXT: mov v2.d[1], xzr
+; CHECK-GI-CVT-NEXT: mov v5.d[0], x10
+; CHECK-GI-CVT-NEXT: mov v3.d[1], xzr
+; CHECK-GI-CVT-NEXT: mov v4.d[1], xzr
+; CHECK-GI-CVT-NEXT: mov v6.d[0], x11
+; CHECK-GI-CVT-NEXT: mov v7.d[0], x12
+; CHECK-GI-CVT-NEXT: mov v1.d[1], xzr
+; CHECK-GI-CVT-NEXT: mov v0.d[1], xzr
+; CHECK-GI-CVT-NEXT: mov v5.d[1], xzr
+; CHECK-GI-CVT-NEXT: stp q2, q3, [x8]
+; CHECK-GI-CVT-NEXT: mov v6.d[1], xzr
+; CHECK-GI-CVT-NEXT: mov v7.d[1], xzr
+; CHECK-GI-CVT-NEXT: stp q4, q1, [x8, #32]
+; CHECK-GI-CVT-NEXT: stp q0, q5, [x8, #64]
+; CHECK-GI-CVT-NEXT: stp q6, q7, [x8, #96]
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_signed_v8f16_v8i128:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT: fcvtzs x9, h0
+; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT: fcvtzs x10, h1
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[6]
+; CHECK-GI-FP16-NEXT: fcvtzs x11, h2
+; CHECK-GI-FP16-NEXT: mov h0, v0.h[7]
+; CHECK-GI-FP16-NEXT: fcvtzs x12, h3
+; CHECK-GI-FP16-NEXT: mov v2.d[0], x9
+; CHECK-GI-FP16-NEXT: fcvtzs x9, h4
+; CHECK-GI-FP16-NEXT: mov v3.d[0], x10
+; CHECK-GI-FP16-NEXT: fcvtzs x10, h5
+; CHECK-GI-FP16-NEXT: mov v4.d[0], x11
+; CHECK-GI-FP16-NEXT: fcvtzs x11, h1
+; CHECK-GI-FP16-NEXT: mov v1.d[0], x12
+; CHECK-GI-FP16-NEXT: fcvtzs x12, h0
+; CHECK-GI-FP16-NEXT: mov v0.d[0], x9
+; CHECK-GI-FP16-NEXT: mov v2.d[1], xzr
+; CHECK-GI-FP16-NEXT: mov v5.d[0], x10
+; CHECK-GI-FP16-NEXT: mov v3.d[1], xzr
+; CHECK-GI-FP16-NEXT: mov v4.d[1], xzr
+; CHECK-GI-FP16-NEXT: mov v6.d[0], x11
+; CHECK-GI-FP16-NEXT: mov v7.d[0], x12
+; CHECK-GI-FP16-NEXT: mov v1.d[1], xzr
+; CHECK-GI-FP16-NEXT: mov v0.d[1], xzr
+; CHECK-GI-FP16-NEXT: mov v5.d[1], xzr
+; CHECK-GI-FP16-NEXT: stp q2, q3, [x8]
+; CHECK-GI-FP16-NEXT: mov v6.d[1], xzr
+; CHECK-GI-FP16-NEXT: mov v7.d[1], xzr
+; CHECK-GI-FP16-NEXT: stp q4, q1, [x8, #32]
+; CHECK-GI-FP16-NEXT: stp q0, q5, [x8, #64]
+; CHECK-GI-FP16-NEXT: stp q6, q7, [x8, #96]
+; CHECK-GI-FP16-NEXT: ret
%x = call <8 x i128> @llvm.fptosi.sat.v8f16.v8i128(<8 x half> %f)
ret <8 x i128> %x
}
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index f63fba9dab6c63..046ec0d0790296 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -8,7 +8,6 @@
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_unsigned_v4f16_v4i50
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_unsigned_v8f16_v8i19
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_unsigned_v8f16_v8i50
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_unsigned_v8f16_v8i128
;
; Float to unsigned 32-bit -- Vector size variation
@@ -3729,147 +3728,233 @@ define <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
}
define <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) {
-; CHECK-LABEL: test_unsigned_v8f16_v8i128:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #176
-; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x29, x30, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: stp x28, x27, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT: stp x26, x25, [sp, #112] // 16-byte Folded Spill
-; CHECK-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 176
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w20, -16
-; CHECK-NEXT: .cfi_offset w21, -24
-; CHECK-NEXT: .cfi_offset w22, -32
-; CHECK-NEXT: .cfi_offset w23, -40
-; CHECK-NEXT: .cfi_offset w24, -48
-; CHECK-NEXT: .cfi_offset w25, -56
-; CHECK-NEXT: .cfi_offset w26, -64
-; CHECK-NEXT: .cfi_offset w27, -72
-; CHECK-NEXT: .cfi_offset w28, -80
-; CHECK-NEXT: .cfi_offset w30, -88
-; CHECK-NEXT: .cfi_offset w29, -96
-; CHECK-NEXT: .cfi_offset b8, -104
-; CHECK-NEXT: .cfi_offset b9, -112
-; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: mov x19, x8
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff
-; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: fmov s9, w8
-; CHECK-NEXT: mov h0, v0.h[1]
-; CHECK-NEXT: csel x9, xzr, x1, lt
-; CHECK-NEXT: csel x8, xzr, x0, lt
-; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: csinv x10, x8, xzr, le
-; CHECK-NEXT: csinv x8, x9, xzr, le
-; CHECK-NEXT: stp x8, x10, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: mov h0, v0.h[2]
-; CHECK-NEXT: csel x8, xzr, x1, lt
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: csinv x9, x9, xzr, le
-; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: stp x8, x9, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: mov h0, v0.h[3]
-; CHECK-NEXT: csel x8, xzr, x1, lt
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: csinv x24, x9, xzr, le
-; CHECK-NEXT: csinv x25, x8, xzr, le
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: csel x8, xzr, x1, lt
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: csinv x26, x9, xzr, le
-; CHECK-NEXT: csinv x27, x8, xzr, le
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: mov h0, v0.h[1]
-; CHECK-NEXT: csel x8, xzr, x1, lt
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: csinv x28, x9, xzr, le
-; CHECK-NEXT: csinv x29, x8, xzr, le
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: mov h0, v0.h[2]
-; CHECK-NEXT: csel x8, xzr, x1, lt
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: csinv x20, x9, xzr, le
-; CHECK-NEXT: csinv x21, x8, xzr, le
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: mov h0, v0.h[3]
-; CHECK-NEXT: csel x8, xzr, x1, lt
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: fcvt s8, h0
-; CHECK-NEXT: csinv x22, x9, xzr, le
-; CHECK-NEXT: csinv x23, x8, xzr, le
-; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: stp x22, x23, [x19, #32]
-; CHECK-NEXT: stp x20, x21, [x19, #16]
-; CHECK-NEXT: stp x28, x29, [x19]
-; CHECK-NEXT: csel x8, xzr, x1, lt
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: stp x26, x27, [x19, #112]
-; CHECK-NEXT: stp x24, x25, [x19, #96]
-; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: csinv x9, x9, xzr, le
-; CHECK-NEXT: stp x9, x8, [x19, #48]
-; CHECK-NEXT: ldr x8, [sp] // 8-byte Folded Reload
-; CHECK-NEXT: str x8, [x19, #88]
-; CHECK-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: str x8, [x19, #80]
-; CHECK-NEXT: ldr x8, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT: str x8, [x19, #72]
-; CHECK-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload
-; CHECK-NEXT: str x8, [x19, #64]
-; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x26, x25, [sp, #112] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x28, x27, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x29, x30, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #176
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_unsigned_v8f16_v8i128:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #176
+; CHECK-SD-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x29, x30, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x28, x27, [sp, #96] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x26, x25, [sp, #112] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 176
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w20, -16
+; CHECK-SD-NEXT: .cfi_offset w21, -24
+; CHECK-SD-NEXT: .cfi_offset w22, -32
+; CHECK-SD-NEXT: .cfi_offset w23, -40
+; CHECK-SD-NEXT: .cfi_offset w24, -48
+; CHECK-SD-NEXT: .cfi_offset w25, -56
+; CHECK-SD-NEXT: .cfi_offset w26, -64
+; CHECK-SD-NEXT: .cfi_offset w27, -72
+; CHECK-SD-NEXT: .cfi_offset w28, -80
+; CHECK-SD-NEXT: .cfi_offset w30, -88
+; CHECK-SD-NEXT: .cfi_offset w29, -96
+; CHECK-SD-NEXT: .cfi_offset b8, -104
+; CHECK-SD-NEXT: .cfi_offset b9, -112
+; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: mov x19, x8
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: bl __fixunssfti
+; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov w8, #2139095039 // =0x7f7fffff
+; CHECK-SD-NEXT: fcmp s8, #0.0
+; CHECK-SD-NEXT: fmov s9, w8
+; CHECK-SD-NEXT: mov h0, v0.h[1]
+; CHECK-SD-NEXT: csel x9, xzr, x1, lt
+; CHECK-SD-NEXT: csel x8, xzr, x0, lt
+; CHECK-SD-NEXT: fcmp s8, s9
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: csinv x10, x8, xzr, le
+; CHECK-SD-NEXT: csinv x8, x9, xzr, le
+; CHECK-SD-NEXT: stp x8, x10, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: bl __fixunssfti
+; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT: fcmp s8, #0.0
+; CHECK-SD-NEXT: mov h0, v0.h[2]
+; CHECK-SD-NEXT: csel x8, xzr, x1, lt
+; CHECK-SD-NEXT: csel x9, xzr, x0, lt
+; CHECK-SD-NEXT: fcmp s8, s9
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: csinv x9, x9, xzr, le
+; CHECK-SD-NEXT: csinv x8, x8, xzr, le
+; CHECK-SD-NEXT: stp x8, x9, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: bl __fixunssfti
+; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT: fcmp s8, #0.0
+; CHECK-SD-NEXT: mov h0, v0.h[3]
+; CHECK-SD-NEXT: csel x8, xzr, x1, lt
+; CHECK-SD-NEXT: csel x9, xzr, x0, lt
+; CHECK-SD-NEXT: fcmp s8, s9
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: csinv x24, x9, xzr, le
+; CHECK-SD-NEXT: csinv x25, x8, xzr, le
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: bl __fixunssfti
+; CHECK-SD-NEXT: fcmp s8, #0.0
+; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT: csel x8, xzr, x1, lt
+; CHECK-SD-NEXT: csel x9, xzr, x0, lt
+; CHECK-SD-NEXT: fcmp s8, s9
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: csinv x26, x9, xzr, le
+; CHECK-SD-NEXT: csinv x27, x8, xzr, le
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: bl __fixunssfti
+; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT: fcmp s8, #0.0
+; CHECK-SD-NEXT: mov h0, v0.h[1]
+; CHECK-SD-NEXT: csel x8, xzr, x1, lt
+; CHECK-SD-NEXT: csel x9, xzr, x0, lt
+; CHECK-SD-NEXT: fcmp s8, s9
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: csinv x28, x9, xzr, le
+; CHECK-SD-NEXT: csinv x29, x8, xzr, le
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: bl __fixunssfti
+; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT: fcmp s8, #0.0
+; CHECK-SD-NEXT: mov h0, v0.h[2]
+; CHECK-SD-NEXT: csel x8, xzr, x1, lt
+; CHECK-SD-NEXT: csel x9, xzr, x0, lt
+; CHECK-SD-NEXT: fcmp s8, s9
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: csinv x20, x9, xzr, le
+; CHECK-SD-NEXT: csinv x21, x8, xzr, le
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: bl __fixunssfti
+; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT: fcmp s8, #0.0
+; CHECK-SD-NEXT: mov h0, v0.h[3]
+; CHECK-SD-NEXT: csel x8, xzr, x1, lt
+; CHECK-SD-NEXT: csel x9, xzr, x0, lt
+; CHECK-SD-NEXT: fcmp s8, s9
+; CHECK-SD-NEXT: fcvt s8, h0
+; CHECK-SD-NEXT: csinv x22, x9, xzr, le
+; CHECK-SD-NEXT: csinv x23, x8, xzr, le
+; CHECK-SD-NEXT: fmov s0, s8
+; CHECK-SD-NEXT: bl __fixunssfti
+; CHECK-SD-NEXT: fcmp s8, #0.0
+; CHECK-SD-NEXT: stp x22, x23, [x19, #32]
+; CHECK-SD-NEXT: stp x20, x21, [x19, #16]
+; CHECK-SD-NEXT: stp x28, x29, [x19]
+; CHECK-SD-NEXT: csel x8, xzr, x1, lt
+; CHECK-SD-NEXT: csel x9, xzr, x0, lt
+; CHECK-SD-NEXT: fcmp s8, s9
+; CHECK-SD-NEXT: stp x26, x27, [x19, #112]
+; CHECK-SD-NEXT: stp x24, x25, [x19, #96]
+; CHECK-SD-NEXT: csinv x8, x8, xzr, le
+; CHECK-SD-NEXT: csinv x9, x9, xzr, le
+; CHECK-SD-NEXT: stp x9, x8, [x19, #48]
+; CHECK-SD-NEXT: ldr x8, [sp] // 8-byte Folded Reload
+; CHECK-SD-NEXT: str x8, [x19, #88]
+; CHECK-SD-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT: str x8, [x19, #80]
+; CHECK-SD-NEXT: ldr x8, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: str x8, [x19, #72]
+; CHECK-SD-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload
+; CHECK-SD-NEXT: str x8, [x19, #64]
+; CHECK-SD-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x26, x25, [sp, #112] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x28, x27, [sp, #96] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x29, x30, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #176
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-CVT-LABEL: test_unsigned_v8f16_v8i128:
+; CHECK-GI-CVT: // %bb.0:
+; CHECK-GI-CVT-NEXT: mov h1, v0.h[1]
+; CHECK-GI-CVT-NEXT: mov h2, v0.h[2]
+; CHECK-GI-CVT-NEXT: mov h3, v0.h[3]
+; CHECK-GI-CVT-NEXT: mov h4, v0.h[4]
+; CHECK-GI-CVT-NEXT: fcvt s5, h0
+; CHECK-GI-CVT-NEXT: mov h6, v0.h[5]
+; CHECK-GI-CVT-NEXT: mov h7, v0.h[6]
+; CHECK-GI-CVT-NEXT: mov h0, v0.h[7]
+; CHECK-GI-CVT-NEXT: fcvt s1, h1
+; CHECK-GI-CVT-NEXT: fcvt s2, h2
+; CHECK-GI-CVT-NEXT: fcvt s3, h3
+; CHECK-GI-CVT-NEXT: fcvtzu x9, s5
+; CHECK-GI-CVT-NEXT: fcvt s4, h4
+; CHECK-GI-CVT-NEXT: fcvt s5, h6
+; CHECK-GI-CVT-NEXT: fcvt s0, h0
+; CHECK-GI-CVT-NEXT: fcvtzu x10, s1
+; CHECK-GI-CVT-NEXT: fcvt s1, h7
+; CHECK-GI-CVT-NEXT: fcvtzu x11, s2
+; CHECK-GI-CVT-NEXT: fcvtzu x12, s3
+; CHECK-GI-CVT-NEXT: mov v2.d[0], x9
+; CHECK-GI-CVT-NEXT: fcvtzu x9, s4
+; CHECK-GI-CVT-NEXT: mov v3.d[0], x10
+; CHECK-GI-CVT-NEXT: fcvtzu x10, s5
+; CHECK-GI-CVT-NEXT: mov v4.d[0], x11
+; CHECK-GI-CVT-NEXT: fcvtzu x11, s1
+; CHECK-GI-CVT-NEXT: mov v1.d[0], x12
+; CHECK-GI-CVT-NEXT: fcvtzu x12, s0
+; CHECK-GI-CVT-NEXT: mov v0.d[0], x9
+; CHECK-GI-CVT-NEXT: mov v2.d[1], xzr
+; CHECK-GI-CVT-NEXT: mov v5.d[0], x10
+; CHECK-GI-CVT-NEXT: mov v3.d[1], xzr
+; CHECK-GI-CVT-NEXT: mov v4.d[1], xzr
+; CHECK-GI-CVT-NEXT: mov v6.d[0], x11
+; CHECK-GI-CVT-NEXT: mov v7.d[0], x12
+; CHECK-GI-CVT-NEXT: mov v1.d[1], xzr
+; CHECK-GI-CVT-NEXT: mov v0.d[1], xzr
+; CHECK-GI-CVT-NEXT: mov v5.d[1], xzr
+; CHECK-GI-CVT-NEXT: stp q2, q3, [x8]
+; CHECK-GI-CVT-NEXT: mov v6.d[1], xzr
+; CHECK-GI-CVT-NEXT: mov v7.d[1], xzr
+; CHECK-GI-CVT-NEXT: stp q4, q1, [x8, #32]
+; CHECK-GI-CVT-NEXT: stp q0, q5, [x8, #64]
+; CHECK-GI-CVT-NEXT: stp q6, q7, [x8, #96]
+; CHECK-GI-CVT-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: test_unsigned_v8f16_v8i128:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT: fcvtzu x9, h0
+; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT: fcvtzu x10, h1
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[6]
+; CHECK-GI-FP16-NEXT: fcvtzu x11, h2
+; CHECK-GI-FP16-NEXT: mov h0, v0.h[7]
+; CHECK-GI-FP16-NEXT: fcvtzu x12, h3
+; CHECK-GI-FP16-NEXT: mov v2.d[0], x9
+; CHECK-GI-FP16-NEXT: fcvtzu x9, h4
+; CHECK-GI-FP16-NEXT: mov v3.d[0], x10
+; CHECK-GI-FP16-NEXT: fcvtzu x10, h5
+; CHECK-GI-FP16-NEXT: mov v4.d[0], x11
+; CHECK-GI-FP16-NEXT: fcvtzu x11, h1
+; CHECK-GI-FP16-NEXT: mov v1.d[0], x12
+; CHECK-GI-FP16-NEXT: fcvtzu x12, h0
+; CHECK-GI-FP16-NEXT: mov v0.d[0], x9
+; CHECK-GI-FP16-NEXT: mov v2.d[1], xzr
+; CHECK-GI-FP16-NEXT: mov v5.d[0], x10
+; CHECK-GI-FP16-NEXT: mov v3.d[1], xzr
+; CHECK-GI-FP16-NEXT: mov v4.d[1], xzr
+; CHECK-GI-FP16-NEXT: mov v6.d[0], x11
+; CHECK-GI-FP16-NEXT: mov v7.d[0], x12
+; CHECK-GI-FP16-NEXT: mov v1.d[1], xzr
+; CHECK-GI-FP16-NEXT: mov v0.d[1], xzr
+; CHECK-GI-FP16-NEXT: mov v5.d[1], xzr
+; CHECK-GI-FP16-NEXT: stp q2, q3, [x8]
+; CHECK-GI-FP16-NEXT: mov v6.d[1], xzr
+; CHECK-GI-FP16-NEXT: mov v7.d[1], xzr
+; CHECK-GI-FP16-NEXT: stp q4, q1, [x8, #32]
+; CHECK-GI-FP16-NEXT: stp q0, q5, [x8, #64]
+; CHECK-GI-FP16-NEXT: stp q6, q7, [x8, #96]
+; CHECK-GI-FP16-NEXT: ret
%x = call <8 x i128> @llvm.fptoui.sat.v8f16.v8i128(<8 x half> %f)
ret <8 x i128> %x
}
diff --git a/llvm/test/CodeGen/AArch64/load.ll b/llvm/test/CodeGen/AArch64/load.ll
index a4d1c53c272aa1..167e9d1c196435 100644
--- a/llvm/test/CodeGen/AArch64/load.ll
+++ b/llvm/test/CodeGen/AArch64/load.ll
@@ -315,3 +315,34 @@ define <3 x i32> @load_v3i32(ptr %ptr){
%a = load <3 x i32>, ptr %ptr
ret <3 x i32> %a
}
+
+define <2 x i128> @load_v2i128(ptr %p) {
+; CHECK-SD-LABEL: load_v2i128:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldp x8, x1, [x0]
+; CHECK-SD-NEXT: ldp x2, x3, [x0, #16]
+; CHECK-SD-NEXT: mov x0, x8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_v2i128:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldp q0, q1, [x0]
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: mov d3, v1.d[1]
+; CHECK-GI-NEXT: fmov x0, d0
+; CHECK-GI-NEXT: fmov x2, d1
+; CHECK-GI-NEXT: fmov x1, d2
+; CHECK-GI-NEXT: fmov x3, d3
+; CHECK-GI-NEXT: ret
+ %a = load <2 x i128>, ptr %p
+ ret <2 x i128> %a
+}
+
+define <2 x fp128> @load_v2f128(ptr %p) {
+; CHECK-LABEL: load_v2f128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ret
+ %a = load <2 x fp128>, ptr %p
+ ret <2 x fp128> %a
+}
diff --git a/llvm/test/CodeGen/AArch64/store.ll b/llvm/test/CodeGen/AArch64/store.ll
index bf22d79a4df994..86d74b69f4958f 100644
--- a/llvm/test/CodeGen/AArch64/store.ll
+++ b/llvm/test/CodeGen/AArch64/store.ll
@@ -340,3 +340,31 @@ define void @store_v3i32(<3 x i32> %a, ptr %ptr){
store <3 x i32> %a, ptr %ptr
ret void
}
+
+define void @store_v2i128(<2 x i128> %a, ptr %p) {
+; CHECK-SD-LABEL: store_v2i128:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: stp x2, x3, [x4, #16]
+; CHECK-SD-NEXT: stp x0, x1, [x4]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: store_v2i128:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov v0.d[0], x0
+; CHECK-GI-NEXT: mov v1.d[0], x2
+; CHECK-GI-NEXT: mov v0.d[1], x1
+; CHECK-GI-NEXT: mov v1.d[1], x3
+; CHECK-GI-NEXT: stp q0, q1, [x4]
+; CHECK-GI-NEXT: ret
+ store <2 x i128> %a, ptr %p
+ ret void
+}
+
+define void @store_v2f128(<2 x fp128> %a, ptr %p) {
+; CHECK-LABEL: store_v2f128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp q0, q1, [x0]
+; CHECK-NEXT: ret
+ store <2 x fp128> %a, ptr %p
+ ret void
+}
More information about the llvm-commits
mailing list