[llvm] [AArch64][GlobalISel] Improve lowering of vector fp16 fpext (PR #165554)
Ryan Cowan via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 10 04:42:20 PST 2025
https://github.com/HolyMolyCowMan updated https://github.com/llvm/llvm-project/pull/165554
>From 3c6eff9ea031c80305b33dfc5d4714c20378658e Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Wed, 29 Oct 2025 12:56:07 +0000
Subject: [PATCH 1/6] [AArch64][GlobalISel] Improve lowering of vector fp16
fpext
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 1 +
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 9 +
.../GlobalISel/legalizer-info-validation.mir | 4 +-
llvm/test/CodeGen/AArch64/fmla.ll | 48 ++--
.../CodeGen/AArch64/fp16-v4-instructions.ll | 26 +-
.../CodeGen/AArch64/fp16-v8-instructions.ll | 50 +---
llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll | 186 +++++-------
llvm/test/CodeGen/AArch64/fpext.ll | 30 +-
llvm/test/CodeGen/AArch64/fptoi.ll | 264 ++++++------------
.../test/CodeGen/AArch64/fptosi-sat-vector.ll | 85 ++----
.../test/CodeGen/AArch64/fptoui-sat-vector.ll | 85 ++----
11 files changed, 260 insertions(+), 528 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 52c43a4ac4a04..b4ab9a5e4092c 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4852,6 +4852,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerMemcpyInline(MI);
case G_ZEXT:
case G_SEXT:
+ case G_FPEXT:
case G_ANYEXT:
return lowerEXT(MI);
case G_TRUNC:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 5f93847bc680e..3542be4105f97 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -825,6 +825,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalFor(
{{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
.libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
+ .moreElementsToNextPow2(0)
+ .lowerIf([](const LegalityQuery &Q) {
+ LLT DstTy = Q.Types[0];
+ LLT SrcTy = Q.Types[1];
+ return SrcTy.isVector() && DstTy.isVector() &&
+ SrcTy.getNumElements() > 2 &&
+ SrcTy.getScalarSizeInBits() == 16 &&
+ DstTy.getScalarSizeInBits() == 64;
+ })
.clampNumElements(0, v4s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.scalarize(0);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 896603d6eb20d..92b273c6141d1 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -555,8 +555,8 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FPEXT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
-# DEBUG-NEXT: .. the first uncovered type index: 2, OK
-# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FPTRUNC (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
diff --git a/llvm/test/CodeGen/AArch64/fmla.ll b/llvm/test/CodeGen/AArch64/fmla.ll
index a37aabb0b5384..12b6562b5cf0c 100644
--- a/llvm/test/CodeGen/AArch64/fmla.ll
+++ b/llvm/test/CodeGen/AArch64/fmla.ll
@@ -865,22 +865,22 @@ define <7 x half> @fmuladd_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h
-; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4]
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h
; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5]
-; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s
-; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s
-; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
-; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h
+; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v2.h[4]
+; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[5]
+; CHECK-GI-NOFP16-NEXT: fcvtn v4.4h, v0.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[6]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1]
-; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
-; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v4.h[1]
+; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[2]
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
-; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[3]
; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1]
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2]
@@ -1350,22 +1350,22 @@ define <7 x half> @fmul_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h
-; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4]
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h
; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5]
-; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s
-; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s
-; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
-; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h
+; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v2.h[4]
+; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[5]
+; CHECK-GI-NOFP16-NEXT: fcvtn v4.4h, v0.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[6]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1]
-; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
-; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v4.h[1]
+; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[2]
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
-; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[3]
; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1]
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2]
diff --git a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
index 6233ce743b706..1e1e25c04b384 100644
--- a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
@@ -241,30 +241,16 @@ define <4 x double> @h_to_d(<4 x half> %a) {
;
; CHECK-CVT-GI-LABEL: h_to_d:
; CHECK-CVT-GI: // %bb.0:
-; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-CVT-GI-NEXT: mov h1, v0.h[1]
-; CHECK-CVT-GI-NEXT: mov h2, v0.h[2]
-; CHECK-CVT-GI-NEXT: mov h3, v0.h[3]
-; CHECK-CVT-GI-NEXT: fcvt d0, h0
-; CHECK-CVT-GI-NEXT: fcvt d4, h1
-; CHECK-CVT-GI-NEXT: fcvt d1, h2
-; CHECK-CVT-GI-NEXT: fcvt d2, h3
-; CHECK-CVT-GI-NEXT: mov v0.d[1], v4.d[0]
-; CHECK-CVT-GI-NEXT: mov v1.d[1], v2.d[0]
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v0.2d, v1.2s
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.2d, v1.4s
; CHECK-CVT-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: h_to_d:
; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
-; CHECK-FP16-GI-NEXT: fcvt d0, h0
-; CHECK-FP16-GI-NEXT: fcvt d4, h1
-; CHECK-FP16-GI-NEXT: fcvt d1, h2
-; CHECK-FP16-GI-NEXT: fcvt d2, h3
-; CHECK-FP16-GI-NEXT: mov v0.d[1], v4.d[0]
-; CHECK-FP16-GI-NEXT: mov v1.d[1], v2.d[0]
+; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v1.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s
; CHECK-FP16-GI-NEXT: ret
%1 = fpext <4 x half> %a to <4 x double>
ret <4 x double> %1
diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
index 86763eb5f9e3b..7b152bcccf1e5 100644
--- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
@@ -298,48 +298,22 @@ define <8 x double> @h_to_d(<8 x half> %a) {
;
; CHECK-CVT-GI-LABEL: h_to_d:
; CHECK-CVT-GI: // %bb.0:
-; CHECK-CVT-GI-NEXT: mov h1, v0.h[1]
-; CHECK-CVT-GI-NEXT: mov h2, v0.h[2]
-; CHECK-CVT-GI-NEXT: mov h3, v0.h[3]
-; CHECK-CVT-GI-NEXT: mov h4, v0.h[4]
-; CHECK-CVT-GI-NEXT: mov h5, v0.h[5]
-; CHECK-CVT-GI-NEXT: mov h6, v0.h[6]
-; CHECK-CVT-GI-NEXT: mov h7, v0.h[7]
-; CHECK-CVT-GI-NEXT: fcvt d0, h0
-; CHECK-CVT-GI-NEXT: fcvt d16, h1
-; CHECK-CVT-GI-NEXT: fcvt d1, h2
-; CHECK-CVT-GI-NEXT: fcvt d17, h3
-; CHECK-CVT-GI-NEXT: fcvt d2, h4
-; CHECK-CVT-GI-NEXT: fcvt d4, h5
-; CHECK-CVT-GI-NEXT: fcvt d3, h6
-; CHECK-CVT-GI-NEXT: fcvt d5, h7
-; CHECK-CVT-GI-NEXT: mov v0.d[1], v16.d[0]
-; CHECK-CVT-GI-NEXT: mov v1.d[1], v17.d[0]
-; CHECK-CVT-GI-NEXT: mov v2.d[1], v4.d[0]
-; CHECK-CVT-GI-NEXT: mov v3.d[1], v5.d[0]
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v3.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtl v0.2d, v1.2s
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.2d, v1.4s
+; CHECK-CVT-GI-NEXT: fcvtl v2.2d, v3.2s
+; CHECK-CVT-GI-NEXT: fcvtl2 v3.2d, v3.4s
; CHECK-CVT-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: h_to_d:
; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
-; CHECK-FP16-GI-NEXT: mov h4, v0.h[4]
-; CHECK-FP16-GI-NEXT: mov h5, v0.h[5]
-; CHECK-FP16-GI-NEXT: mov h6, v0.h[6]
-; CHECK-FP16-GI-NEXT: mov h7, v0.h[7]
-; CHECK-FP16-GI-NEXT: fcvt d0, h0
-; CHECK-FP16-GI-NEXT: fcvt d16, h1
-; CHECK-FP16-GI-NEXT: fcvt d1, h2
-; CHECK-FP16-GI-NEXT: fcvt d17, h3
-; CHECK-FP16-GI-NEXT: fcvt d2, h4
-; CHECK-FP16-GI-NEXT: fcvt d4, h5
-; CHECK-FP16-GI-NEXT: fcvt d3, h6
-; CHECK-FP16-GI-NEXT: fcvt d5, h7
-; CHECK-FP16-GI-NEXT: mov v0.d[1], v16.d[0]
-; CHECK-FP16-GI-NEXT: mov v1.d[1], v17.d[0]
-; CHECK-FP16-GI-NEXT: mov v2.d[1], v4.d[0]
-; CHECK-FP16-GI-NEXT: mov v3.d[1], v5.d[0]
+; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-FP16-GI-NEXT: fcvtl2 v3.4s, v0.8h
+; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v1.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s
+; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v3.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v3.2d, v3.4s
; CHECK-FP16-GI-NEXT: ret
%1 = fpext <8 x half> %a to <8 x double>
ret <8 x double> %1
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
index 637c02875b84e..b075a8b6f70ee 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -285,31 +285,24 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
;
; CHECK-FP16-GI-LABEL: stest_f16i32:
; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
+; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI6_1
-; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
-; CHECK-FP16-GI-NEXT: fcvt d0, h0
-; CHECK-FP16-GI-NEXT: fcvt d1, h1
-; CHECK-FP16-GI-NEXT: fcvt d2, h2
-; CHECK-FP16-GI-NEXT: fcvt d3, h3
-; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0]
-; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d
; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI6_1]
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI6_0
-; CHECK-FP16-GI-NEXT: cmgt v3.2d, v2.2d, v0.2d
-; CHECK-FP16-GI-NEXT: cmgt v4.2d, v2.2d, v1.2d
-; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v3.16b
-; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v4.16b
+; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v1.2d
+; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-FP16-GI-NEXT: cmgt v3.2d, v2.2d, v1.2d
+; CHECK-FP16-GI-NEXT: cmgt v4.2d, v2.2d, v0.2d
+; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b
+; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b
; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI6_0]
-; CHECK-FP16-GI-NEXT: cmgt v3.2d, v0.2d, v2.2d
-; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v2.2d
-; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v3.16b
-; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v4.16b
-; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d
+; CHECK-FP16-GI-NEXT: cmgt v4.2d, v0.2d, v2.2d
+; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b
+; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b
+; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
@@ -351,24 +344,17 @@ define <4 x i32> @utest_f16i32(<4 x half> %x) {
;
; CHECK-FP16-GI-LABEL: utest_f16i32:
; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-GI-NEXT: mov h2, v0.h[1]
-; CHECK-FP16-GI-NEXT: mov h3, v0.h[2]
-; CHECK-FP16-GI-NEXT: mov h4, v0.h[3]
-; CHECK-FP16-GI-NEXT: fcvt d0, h0
+; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h
; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-FP16-GI-NEXT: fcvt d2, h2
-; CHECK-FP16-GI-NEXT: fcvt d3, h3
-; CHECK-FP16-GI-NEXT: fcvt d4, h4
-; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0]
-; CHECK-FP16-GI-NEXT: mov v3.d[1], v4.d[0]
+; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v2.2d
; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v3.2d
-; CHECK-FP16-GI-NEXT: cmhi v3.2d, v1.2d, v0.2d
-; CHECK-FP16-GI-NEXT: cmhi v4.2d, v1.2d, v2.2d
-; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v3.16b
-; CHECK-FP16-GI-NEXT: bit v1.16b, v2.16b, v4.16b
-; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-FP16-GI-NEXT: cmhi v3.2d, v1.2d, v2.2d
+; CHECK-FP16-GI-NEXT: cmhi v4.2d, v1.2d, v0.2d
+; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b
+; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b
+; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%conv = fptoui <4 x half> %x to <4 x i64>
@@ -412,28 +398,21 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
;
; CHECK-FP16-GI-LABEL: ustest_f16i32:
; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-GI-NEXT: mov h2, v0.h[1]
-; CHECK-FP16-GI-NEXT: mov h3, v0.h[2]
-; CHECK-FP16-GI-NEXT: mov h4, v0.h[3]
-; CHECK-FP16-GI-NEXT: fcvt d0, h0
+; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h
; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-FP16-GI-NEXT: fcvt d2, h2
-; CHECK-FP16-GI-NEXT: fcvt d3, h3
-; CHECK-FP16-GI-NEXT: fcvt d4, h4
-; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0]
-; CHECK-FP16-GI-NEXT: mov v3.d[1], v4.d[0]
+; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v2.2d
; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v3.2d
-; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v0.2d
-; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v2.2d
-; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v3.16b
-; CHECK-FP16-GI-NEXT: bit v1.16b, v2.16b, v4.16b
-; CHECK-FP16-GI-NEXT: cmgt v2.2d, v0.2d, #0
-; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, #0
-; CHECK-FP16-GI-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-FP16-GI-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d
+; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v0.2d
+; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b
+; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b
+; CHECK-FP16-GI-NEXT: cmgt v1.2d, v2.2d, #0
+; CHECK-FP16-GI-NEXT: cmgt v3.2d, v0.2d, #0
+; CHECK-FP16-GI-NEXT: and v1.16b, v2.16b, v1.16b
+; CHECK-FP16-GI-NEXT: and v0.16b, v0.16b, v3.16b
+; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
@@ -2273,31 +2252,24 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
;
; CHECK-FP16-GI-LABEL: stest_f16i32_mm:
; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
+; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI33_1
-; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
-; CHECK-FP16-GI-NEXT: fcvt d0, h0
-; CHECK-FP16-GI-NEXT: fcvt d1, h1
-; CHECK-FP16-GI-NEXT: fcvt d2, h2
-; CHECK-FP16-GI-NEXT: fcvt d3, h3
-; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0]
-; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d
; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_1]
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI33_0
-; CHECK-FP16-GI-NEXT: cmgt v3.2d, v2.2d, v0.2d
-; CHECK-FP16-GI-NEXT: cmgt v4.2d, v2.2d, v1.2d
-; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v3.16b
-; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v4.16b
+; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v1.2d
+; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-FP16-GI-NEXT: cmgt v3.2d, v2.2d, v1.2d
+; CHECK-FP16-GI-NEXT: cmgt v4.2d, v2.2d, v0.2d
+; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b
+; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b
; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_0]
-; CHECK-FP16-GI-NEXT: cmgt v3.2d, v0.2d, v2.2d
-; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v2.2d
-; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v3.16b
-; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v4.16b
-; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d
+; CHECK-FP16-GI-NEXT: cmgt v4.2d, v0.2d, v2.2d
+; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b
+; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b
+; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
@@ -2337,24 +2309,17 @@ define <4 x i32> @utest_f16i32_mm(<4 x half> %x) {
;
; CHECK-FP16-GI-LABEL: utest_f16i32_mm:
; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-GI-NEXT: mov h2, v0.h[1]
-; CHECK-FP16-GI-NEXT: mov h3, v0.h[2]
-; CHECK-FP16-GI-NEXT: mov h4, v0.h[3]
-; CHECK-FP16-GI-NEXT: fcvt d0, h0
+; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h
; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-FP16-GI-NEXT: fcvt d2, h2
-; CHECK-FP16-GI-NEXT: fcvt d3, h3
-; CHECK-FP16-GI-NEXT: fcvt d4, h4
-; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0]
-; CHECK-FP16-GI-NEXT: mov v3.d[1], v4.d[0]
+; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v2.2d
; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v3.2d
-; CHECK-FP16-GI-NEXT: cmhi v3.2d, v1.2d, v0.2d
-; CHECK-FP16-GI-NEXT: cmhi v4.2d, v1.2d, v2.2d
-; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v3.16b
-; CHECK-FP16-GI-NEXT: bit v1.16b, v2.16b, v4.16b
-; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-FP16-GI-NEXT: cmhi v3.2d, v1.2d, v2.2d
+; CHECK-FP16-GI-NEXT: cmhi v4.2d, v1.2d, v0.2d
+; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b
+; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b
+; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%conv = fptoui <4 x half> %x to <4 x i64>
@@ -2397,28 +2362,21 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
;
; CHECK-FP16-GI-LABEL: ustest_f16i32_mm:
; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-GI-NEXT: mov h2, v0.h[1]
-; CHECK-FP16-GI-NEXT: mov h3, v0.h[2]
-; CHECK-FP16-GI-NEXT: mov h4, v0.h[3]
-; CHECK-FP16-GI-NEXT: fcvt d0, h0
+; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h
; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-FP16-GI-NEXT: fcvt d2, h2
-; CHECK-FP16-GI-NEXT: fcvt d3, h3
-; CHECK-FP16-GI-NEXT: fcvt d4, h4
-; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0]
-; CHECK-FP16-GI-NEXT: mov v3.d[1], v4.d[0]
+; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v2.2d
; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v3.2d
-; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v0.2d
-; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v2.2d
-; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v3.16b
-; CHECK-FP16-GI-NEXT: bit v1.16b, v2.16b, v4.16b
-; CHECK-FP16-GI-NEXT: cmgt v2.2d, v0.2d, #0
-; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, #0
-; CHECK-FP16-GI-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-FP16-GI-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d
+; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v0.2d
+; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b
+; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b
+; CHECK-FP16-GI-NEXT: cmgt v1.2d, v2.2d, #0
+; CHECK-FP16-GI-NEXT: cmgt v3.2d, v0.2d, #0
+; CHECK-FP16-GI-NEXT: and v1.16b, v2.16b, v1.16b
+; CHECK-FP16-GI-NEXT: and v0.16b, v0.16b, v3.16b
+; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
diff --git a/llvm/test/CodeGen/AArch64/fpext.ll b/llvm/test/CodeGen/AArch64/fpext.ll
index df90f9d5f0910..295cb007467bd 100644
--- a/llvm/test/CodeGen/AArch64/fpext.ll
+++ b/llvm/test/CodeGen/AArch64/fpext.ll
@@ -82,11 +82,12 @@ define <3 x double> @fpext_v3f32_v3f64(<3 x float> %a) {
;
; CHECK-GI-LABEL: fpext_v3f32_v3f64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov s1, v0.s[2]
+; CHECK-GI-NEXT: mov v1.s[0], v0.s[2]
; CHECK-GI-NEXT: fcvtl v0.2d, v0.2s
-; CHECK-GI-NEXT: fcvt d2, s1
+; CHECK-GI-NEXT: fcvtl v2.2d, v1.2s
; CHECK-GI-NEXT: mov d1, v0.d[1]
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
; CHECK-GI-NEXT: ret
entry:
%c = fpext <3 x float> %a to <3 x double>
@@ -353,12 +354,12 @@ define <3 x double> @fpext_v3f16_v3f64(<3 x half> %a) {
;
; CHECK-GI-LABEL: fpext_v3f16_v3f64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: fcvt d0, h0
-; CHECK-GI-NEXT: fcvt d1, h1
-; CHECK-GI-NEXT: fcvt d2, h2
+; CHECK-GI-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-GI-NEXT: fcvtl v0.2d, v1.2s
+; CHECK-GI-NEXT: fcvtl2 v2.2d, v1.4s
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
entry:
%c = fpext <3 x half> %a to <3 x double>
@@ -375,16 +376,9 @@ define <4 x double> @fpext_v4f16_v4f64(<4 x half> %a) {
;
; CHECK-GI-LABEL: fpext_v4f16_v4f64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: mov h3, v0.h[3]
-; CHECK-GI-NEXT: fcvt d0, h0
-; CHECK-GI-NEXT: fcvt d4, h1
-; CHECK-GI-NEXT: fcvt d1, h2
-; CHECK-GI-NEXT: fcvt d2, h3
-; CHECK-GI-NEXT: mov v0.d[1], v4.d[0]
-; CHECK-GI-NEXT: mov v1.d[1], v2.d[0]
+; CHECK-GI-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-GI-NEXT: fcvtl v0.2d, v1.2s
+; CHECK-GI-NEXT: fcvtl2 v1.2d, v1.4s
; CHECK-GI-NEXT: ret
entry:
%c = fpext <4 x half> %a to <4 x double>
diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll
index f6053cee50dae..7f747ec9b7cbb 100644
--- a/llvm/test/CodeGen/AArch64/fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/fptoi.ll
@@ -4710,20 +4710,14 @@ define <3 x i64> @fptos_v3f16_v3i64(<3 x half> %a) {
;
; CHECK-FP16-GI-LABEL: fptos_v3f16_v3i64:
; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-GI-NEXT: mov h2, v0.h[1]
-; CHECK-FP16-GI-NEXT: fcvt d1, h0
-; CHECK-FP16-GI-NEXT: mov h3, v0.h[2]
-; CHECK-FP16-GI-NEXT: fcvt d0, h0
-; CHECK-FP16-GI-NEXT: fcvt d2, h2
-; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0]
-; CHECK-FP16-GI-NEXT: fcvt d2, h3
-; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: mov v2.d[1], v1.d[0]
-; CHECK-FP16-GI-NEXT: mov d1, v0.d[1]
+; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v1.2d
; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v2.2d
-; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-FP16-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-FP16-GI-NEXT: mov d1, v0.d[1]
+; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-FP16-GI-NEXT: ret
entry:
%c = fptosi <3 x half> %a to <3 x i64>
@@ -4774,20 +4768,14 @@ define <3 x i64> @fptou_v3f16_v3i64(<3 x half> %a) {
;
; CHECK-FP16-GI-LABEL: fptou_v3f16_v3i64:
; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-GI-NEXT: mov h2, v0.h[1]
-; CHECK-FP16-GI-NEXT: fcvt d1, h0
-; CHECK-FP16-GI-NEXT: mov h3, v0.h[2]
-; CHECK-FP16-GI-NEXT: fcvt d0, h0
-; CHECK-FP16-GI-NEXT: fcvt d2, h2
-; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0]
-; CHECK-FP16-GI-NEXT: fcvt d2, h3
-; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: mov v2.d[1], v1.d[0]
-; CHECK-FP16-GI-NEXT: mov d1, v0.d[1]
+; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v1.2d
; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v2.2d
-; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-FP16-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-FP16-GI-NEXT: mov d1, v0.d[1]
+; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-FP16-GI-NEXT: ret
entry:
%c = fptoui <3 x half> %a to <3 x i64>
@@ -4842,17 +4830,10 @@ define <4 x i64> @fptos_v4f16_v4i64(<4 x half> %a) {
;
; CHECK-FP16-GI-LABEL: fptos_v4f16_v4i64:
; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
-; CHECK-FP16-GI-NEXT: fcvt d0, h0
-; CHECK-FP16-GI-NEXT: fcvt d1, h1
-; CHECK-FP16-GI-NEXT: fcvt d2, h2
-; CHECK-FP16-GI-NEXT: fcvt d3, h3
-; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0]
-; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v1.2d
; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d
; CHECK-FP16-GI-NEXT: ret
entry:
@@ -4908,17 +4889,10 @@ define <4 x i64> @fptou_v4f16_v4i64(<4 x half> %a) {
;
; CHECK-FP16-GI-LABEL: fptou_v4f16_v4i64:
; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
-; CHECK-FP16-GI-NEXT: fcvt d0, h0
-; CHECK-FP16-GI-NEXT: fcvt d1, h1
-; CHECK-FP16-GI-NEXT: fcvt d2, h2
-; CHECK-FP16-GI-NEXT: fcvt d3, h3
-; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0]
-; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d
+; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v1.2d
; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v2.2d
; CHECK-FP16-GI-NEXT: ret
entry:
@@ -5005,29 +4979,16 @@ define <8 x i64> @fptos_v8f16_v8i64(<8 x half> %a) {
;
; CHECK-FP16-GI-LABEL: fptos_v8f16_v8i64:
; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
-; CHECK-FP16-GI-NEXT: mov h4, v0.h[4]
-; CHECK-FP16-GI-NEXT: mov h5, v0.h[5]
-; CHECK-FP16-GI-NEXT: mov h6, v0.h[6]
-; CHECK-FP16-GI-NEXT: mov h7, v0.h[7]
-; CHECK-FP16-GI-NEXT: fcvt d0, h0
-; CHECK-FP16-GI-NEXT: fcvt d1, h1
-; CHECK-FP16-GI-NEXT: fcvt d2, h2
-; CHECK-FP16-GI-NEXT: fcvt d3, h3
-; CHECK-FP16-GI-NEXT: fcvt d4, h4
-; CHECK-FP16-GI-NEXT: fcvt d5, h5
-; CHECK-FP16-GI-NEXT: fcvt d6, h6
-; CHECK-FP16-GI-NEXT: fcvt d7, h7
-; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0]
-; CHECK-FP16-GI-NEXT: mov v4.d[1], v5.d[0]
-; CHECK-FP16-GI-NEXT: mov v6.d[1], v7.d[0]
-; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d
-; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v4.2d
-; CHECK-FP16-GI-NEXT: fcvtzs v3.2d, v6.2d
+; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v1.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s
+; CHECK-FP16-GI-NEXT: fcvtl v3.2d, v0.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v4.2d, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v2.2d
+; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v1.2d
+; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v3.2d
+; CHECK-FP16-GI-NEXT: fcvtzs v3.2d, v4.2d
; CHECK-FP16-GI-NEXT: ret
entry:
%c = fptosi <8 x half> %a to <8 x i64>
@@ -5113,29 +5074,16 @@ define <8 x i64> @fptou_v8f16_v8i64(<8 x half> %a) {
;
; CHECK-FP16-GI-LABEL: fptou_v8f16_v8i64:
; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
-; CHECK-FP16-GI-NEXT: mov h4, v0.h[4]
-; CHECK-FP16-GI-NEXT: mov h5, v0.h[5]
-; CHECK-FP16-GI-NEXT: mov h6, v0.h[6]
-; CHECK-FP16-GI-NEXT: mov h7, v0.h[7]
-; CHECK-FP16-GI-NEXT: fcvt d0, h0
-; CHECK-FP16-GI-NEXT: fcvt d1, h1
-; CHECK-FP16-GI-NEXT: fcvt d2, h2
-; CHECK-FP16-GI-NEXT: fcvt d3, h3
-; CHECK-FP16-GI-NEXT: fcvt d4, h4
-; CHECK-FP16-GI-NEXT: fcvt d5, h5
-; CHECK-FP16-GI-NEXT: fcvt d6, h6
-; CHECK-FP16-GI-NEXT: fcvt d7, h7
-; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0]
-; CHECK-FP16-GI-NEXT: mov v4.d[1], v5.d[0]
-; CHECK-FP16-GI-NEXT: mov v6.d[1], v7.d[0]
-; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v2.2d
-; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v4.2d
-; CHECK-FP16-GI-NEXT: fcvtzu v3.2d, v6.2d
+; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v1.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s
+; CHECK-FP16-GI-NEXT: fcvtl v3.2d, v0.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v4.2d, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v2.2d
+; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v1.2d
+; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v3.2d
+; CHECK-FP16-GI-NEXT: fcvtzu v3.2d, v4.2d
; CHECK-FP16-GI-NEXT: ret
entry:
%c = fptoui <8 x half> %a to <8 x i64>
@@ -5285,52 +5233,26 @@ define <16 x i64> @fptos_v16f16_v16i64(<16 x half> %a) {
;
; CHECK-FP16-GI-LABEL: fptos_v16f16_v16i64:
; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: mov h3, v0.h[1]
-; CHECK-FP16-GI-NEXT: mov h4, v0.h[2]
-; CHECK-FP16-GI-NEXT: mov h5, v0.h[3]
-; CHECK-FP16-GI-NEXT: fcvt d2, h0
-; CHECK-FP16-GI-NEXT: mov h6, v0.h[4]
-; CHECK-FP16-GI-NEXT: mov h7, v0.h[5]
-; CHECK-FP16-GI-NEXT: mov h16, v0.h[6]
-; CHECK-FP16-GI-NEXT: mov h0, v0.h[7]
-; CHECK-FP16-GI-NEXT: mov h17, v1.h[1]
-; CHECK-FP16-GI-NEXT: mov h18, v1.h[2]
-; CHECK-FP16-GI-NEXT: mov h19, v1.h[3]
-; CHECK-FP16-GI-NEXT: mov h20, v1.h[4]
-; CHECK-FP16-GI-NEXT: mov h21, v1.h[5]
-; CHECK-FP16-GI-NEXT: mov h22, v1.h[6]
-; CHECK-FP16-GI-NEXT: mov h23, v1.h[7]
-; CHECK-FP16-GI-NEXT: fcvt d3, h3
-; CHECK-FP16-GI-NEXT: fcvt d4, h4
-; CHECK-FP16-GI-NEXT: fcvt d5, h5
-; CHECK-FP16-GI-NEXT: fcvt d6, h6
-; CHECK-FP16-GI-NEXT: fcvt d7, h7
-; CHECK-FP16-GI-NEXT: fcvt d16, h16
-; CHECK-FP16-GI-NEXT: fcvt d0, h0
-; CHECK-FP16-GI-NEXT: fcvt d24, h1
-; CHECK-FP16-GI-NEXT: fcvt d1, h17
-; CHECK-FP16-GI-NEXT: fcvt d17, h18
-; CHECK-FP16-GI-NEXT: fcvt d18, h19
-; CHECK-FP16-GI-NEXT: fcvt d19, h20
-; CHECK-FP16-GI-NEXT: fcvt d20, h21
-; CHECK-FP16-GI-NEXT: fcvt d21, h22
-; CHECK-FP16-GI-NEXT: fcvt d22, h23
-; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0]
-; CHECK-FP16-GI-NEXT: mov v4.d[1], v5.d[0]
-; CHECK-FP16-GI-NEXT: mov v6.d[1], v7.d[0]
-; CHECK-FP16-GI-NEXT: mov v16.d[1], v0.d[0]
-; CHECK-FP16-GI-NEXT: mov v24.d[1], v1.d[0]
-; CHECK-FP16-GI-NEXT: mov v17.d[1], v18.d[0]
-; CHECK-FP16-GI-NEXT: mov v19.d[1], v20.d[0]
-; CHECK-FP16-GI-NEXT: mov v21.d[1], v22.d[0]
-; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v2.2d
-; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v4.2d
-; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v6.2d
-; CHECK-FP16-GI-NEXT: fcvtzs v3.2d, v16.2d
-; CHECK-FP16-GI-NEXT: fcvtzs v4.2d, v24.2d
-; CHECK-FP16-GI-NEXT: fcvtzs v5.2d, v17.2d
-; CHECK-FP16-GI-NEXT: fcvtzs v6.2d, v19.2d
-; CHECK-FP16-GI-NEXT: fcvtzs v7.2d, v21.2d
+; CHECK-FP16-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-FP16-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-FP16-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-FP16-GI-NEXT: fcvtl v4.2d, v2.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v2.4s
+; CHECK-FP16-GI-NEXT: fcvtl v5.2d, v0.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v6.2d, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtl v7.2d, v3.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v16.2d, v3.4s
+; CHECK-FP16-GI-NEXT: fcvtl v17.2d, v1.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v18.2d, v1.4s
+; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v4.2d
+; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d
+; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v5.2d
+; CHECK-FP16-GI-NEXT: fcvtzs v3.2d, v6.2d
+; CHECK-FP16-GI-NEXT: fcvtzs v4.2d, v7.2d
+; CHECK-FP16-GI-NEXT: fcvtzs v5.2d, v16.2d
+; CHECK-FP16-GI-NEXT: fcvtzs v6.2d, v17.2d
+; CHECK-FP16-GI-NEXT: fcvtzs v7.2d, v18.2d
; CHECK-FP16-GI-NEXT: ret
entry:
%c = fptosi <16 x half> %a to <16 x i64>
@@ -5480,52 +5402,26 @@ define <16 x i64> @fptou_v16f16_v16i64(<16 x half> %a) {
;
; CHECK-FP16-GI-LABEL: fptou_v16f16_v16i64:
; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: mov h3, v0.h[1]
-; CHECK-FP16-GI-NEXT: mov h4, v0.h[2]
-; CHECK-FP16-GI-NEXT: mov h5, v0.h[3]
-; CHECK-FP16-GI-NEXT: fcvt d2, h0
-; CHECK-FP16-GI-NEXT: mov h6, v0.h[4]
-; CHECK-FP16-GI-NEXT: mov h7, v0.h[5]
-; CHECK-FP16-GI-NEXT: mov h16, v0.h[6]
-; CHECK-FP16-GI-NEXT: mov h0, v0.h[7]
-; CHECK-FP16-GI-NEXT: mov h17, v1.h[1]
-; CHECK-FP16-GI-NEXT: mov h18, v1.h[2]
-; CHECK-FP16-GI-NEXT: mov h19, v1.h[3]
-; CHECK-FP16-GI-NEXT: mov h20, v1.h[4]
-; CHECK-FP16-GI-NEXT: mov h21, v1.h[5]
-; CHECK-FP16-GI-NEXT: mov h22, v1.h[6]
-; CHECK-FP16-GI-NEXT: mov h23, v1.h[7]
-; CHECK-FP16-GI-NEXT: fcvt d3, h3
-; CHECK-FP16-GI-NEXT: fcvt d4, h4
-; CHECK-FP16-GI-NEXT: fcvt d5, h5
-; CHECK-FP16-GI-NEXT: fcvt d6, h6
-; CHECK-FP16-GI-NEXT: fcvt d7, h7
-; CHECK-FP16-GI-NEXT: fcvt d16, h16
-; CHECK-FP16-GI-NEXT: fcvt d0, h0
-; CHECK-FP16-GI-NEXT: fcvt d24, h1
-; CHECK-FP16-GI-NEXT: fcvt d1, h17
-; CHECK-FP16-GI-NEXT: fcvt d17, h18
-; CHECK-FP16-GI-NEXT: fcvt d18, h19
-; CHECK-FP16-GI-NEXT: fcvt d19, h20
-; CHECK-FP16-GI-NEXT: fcvt d20, h21
-; CHECK-FP16-GI-NEXT: fcvt d21, h22
-; CHECK-FP16-GI-NEXT: fcvt d22, h23
-; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0]
-; CHECK-FP16-GI-NEXT: mov v4.d[1], v5.d[0]
-; CHECK-FP16-GI-NEXT: mov v6.d[1], v7.d[0]
-; CHECK-FP16-GI-NEXT: mov v16.d[1], v0.d[0]
-; CHECK-FP16-GI-NEXT: mov v24.d[1], v1.d[0]
-; CHECK-FP16-GI-NEXT: mov v17.d[1], v18.d[0]
-; CHECK-FP16-GI-NEXT: mov v19.d[1], v20.d[0]
-; CHECK-FP16-GI-NEXT: mov v21.d[1], v22.d[0]
-; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v2.2d
-; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v4.2d
-; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v6.2d
-; CHECK-FP16-GI-NEXT: fcvtzu v3.2d, v16.2d
-; CHECK-FP16-GI-NEXT: fcvtzu v4.2d, v24.2d
-; CHECK-FP16-GI-NEXT: fcvtzu v5.2d, v17.2d
-; CHECK-FP16-GI-NEXT: fcvtzu v6.2d, v19.2d
-; CHECK-FP16-GI-NEXT: fcvtzu v7.2d, v21.2d
+; CHECK-FP16-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-FP16-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-FP16-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-FP16-GI-NEXT: fcvtl v4.2d, v2.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v2.4s
+; CHECK-FP16-GI-NEXT: fcvtl v5.2d, v0.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v6.2d, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtl v7.2d, v3.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v16.2d, v3.4s
+; CHECK-FP16-GI-NEXT: fcvtl v17.2d, v1.2s
+; CHECK-FP16-GI-NEXT: fcvtl2 v18.2d, v1.4s
+; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v4.2d
+; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v2.2d
+; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v5.2d
+; CHECK-FP16-GI-NEXT: fcvtzu v3.2d, v6.2d
+; CHECK-FP16-GI-NEXT: fcvtzu v4.2d, v7.2d
+; CHECK-FP16-GI-NEXT: fcvtzu v5.2d, v16.2d
+; CHECK-FP16-GI-NEXT: fcvtzu v6.2d, v17.2d
+; CHECK-FP16-GI-NEXT: fcvtzu v7.2d, v18.2d
; CHECK-FP16-GI-NEXT: ret
entry:
%c = fptoui <16 x half> %a to <16 x i64>
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index b963acd8cb2a1..dbcfaff8aee05 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -3088,30 +3088,14 @@ define <4 x i64> @test_signed_v4f16_v4i64(<4 x half> %f) {
; CHECK-SD-FP16-NEXT: mov v1.d[1], x11
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-CVT-LABEL: test_signed_v4f16_v4i64:
-; CHECK-GI-CVT: // %bb.0:
-; CHECK-GI-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-GI-CVT-NEXT: fcvtl v1.2d, v0.2s
-; CHECK-GI-CVT-NEXT: fcvtl2 v2.2d, v0.4s
-; CHECK-GI-CVT-NEXT: fcvtzs v0.2d, v1.2d
-; CHECK-GI-CVT-NEXT: fcvtzs v1.2d, v2.2d
-; CHECK-GI-CVT-NEXT: ret
-;
-; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i64:
-; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
-; CHECK-GI-FP16-NEXT: fcvt d0, h0
-; CHECK-GI-FP16-NEXT: fcvt d1, h1
-; CHECK-GI-FP16-NEXT: fcvt d2, h2
-; CHECK-GI-FP16-NEXT: fcvt d3, h3
-; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0]
-; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d
-; CHECK-GI-FP16-NEXT: ret
+; CHECK-GI-LABEL: test_signed_v4f16_v4i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NEXT: fcvtl v1.2d, v0.2s
+; CHECK-GI-NEXT: fcvtl2 v2.2d, v0.4s
+; CHECK-GI-NEXT: fcvtzs v0.2d, v1.2d
+; CHECK-GI-NEXT: fcvtzs v1.2d, v2.2d
+; CHECK-GI-NEXT: ret
%x = call <4 x i64> @llvm.fptosi.sat.v4f16.v4i64(<4 x half> %f)
ret <4 x i64> %x
}
@@ -3797,46 +3781,19 @@ define <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) {
; CHECK-SD-FP16-NEXT: mov v3.d[1], x14
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-CVT-LABEL: test_signed_v8f16_v8i64:
-; CHECK-GI-CVT: // %bb.0:
-; CHECK-GI-CVT-NEXT: fcvtl v1.4s, v0.4h
-; CHECK-GI-CVT-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-GI-CVT-NEXT: fcvtl v2.2d, v1.2s
-; CHECK-GI-CVT-NEXT: fcvtl2 v1.2d, v1.4s
-; CHECK-GI-CVT-NEXT: fcvtl v3.2d, v0.2s
-; CHECK-GI-CVT-NEXT: fcvtl2 v4.2d, v0.4s
-; CHECK-GI-CVT-NEXT: fcvtzs v0.2d, v2.2d
-; CHECK-GI-CVT-NEXT: fcvtzs v1.2d, v1.2d
-; CHECK-GI-CVT-NEXT: fcvtzs v2.2d, v3.2d
-; CHECK-GI-CVT-NEXT: fcvtzs v3.2d, v4.2d
-; CHECK-GI-CVT-NEXT: ret
-;
-; CHECK-GI-FP16-LABEL: test_signed_v8f16_v8i64:
-; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
-; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
-; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
-; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
-; CHECK-GI-FP16-NEXT: mov h7, v0.h[7]
-; CHECK-GI-FP16-NEXT: fcvt d0, h0
-; CHECK-GI-FP16-NEXT: fcvt d1, h1
-; CHECK-GI-FP16-NEXT: fcvt d2, h2
-; CHECK-GI-FP16-NEXT: fcvt d3, h3
-; CHECK-GI-FP16-NEXT: fcvt d4, h4
-; CHECK-GI-FP16-NEXT: fcvt d5, h5
-; CHECK-GI-FP16-NEXT: fcvt d6, h6
-; CHECK-GI-FP16-NEXT: fcvt d7, h7
-; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0]
-; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0]
-; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0]
-; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d
-; CHECK-GI-FP16-NEXT: fcvtzs v2.2d, v4.2d
-; CHECK-GI-FP16-NEXT: fcvtzs v3.2d, v6.2d
-; CHECK-GI-FP16-NEXT: ret
+; CHECK-GI-LABEL: test_signed_v8f16_v8i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NEXT: fcvtl v2.2d, v1.2s
+; CHECK-GI-NEXT: fcvtl2 v1.2d, v1.4s
+; CHECK-GI-NEXT: fcvtl v3.2d, v0.2s
+; CHECK-GI-NEXT: fcvtl2 v4.2d, v0.4s
+; CHECK-GI-NEXT: fcvtzs v0.2d, v2.2d
+; CHECK-GI-NEXT: fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT: fcvtzs v2.2d, v3.2d
+; CHECK-GI-NEXT: fcvtzs v3.2d, v4.2d
+; CHECK-GI-NEXT: ret
%x = call <8 x i64> @llvm.fptosi.sat.v8f16.v8i64(<8 x half> %f)
ret <8 x i64> %x
}
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index 5a66b68af8e96..44e6e9415263b 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -2506,30 +2506,14 @@ define <4 x i64> @test_unsigned_v4f16_v4i64(<4 x half> %f) {
; CHECK-SD-FP16-NEXT: mov v1.d[1], x11
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-CVT-LABEL: test_unsigned_v4f16_v4i64:
-; CHECK-GI-CVT: // %bb.0:
-; CHECK-GI-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-GI-CVT-NEXT: fcvtl v1.2d, v0.2s
-; CHECK-GI-CVT-NEXT: fcvtl2 v2.2d, v0.4s
-; CHECK-GI-CVT-NEXT: fcvtzu v0.2d, v1.2d
-; CHECK-GI-CVT-NEXT: fcvtzu v1.2d, v2.2d
-; CHECK-GI-CVT-NEXT: ret
-;
-; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i64:
-; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
-; CHECK-GI-FP16-NEXT: fcvt d0, h0
-; CHECK-GI-FP16-NEXT: fcvt d1, h1
-; CHECK-GI-FP16-NEXT: fcvt d2, h2
-; CHECK-GI-FP16-NEXT: fcvt d3, h3
-; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0]
-; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d
-; CHECK-GI-FP16-NEXT: ret
+; CHECK-GI-LABEL: test_unsigned_v4f16_v4i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NEXT: fcvtl v1.2d, v0.2s
+; CHECK-GI-NEXT: fcvtl2 v2.2d, v0.4s
+; CHECK-GI-NEXT: fcvtzu v0.2d, v1.2d
+; CHECK-GI-NEXT: fcvtzu v1.2d, v2.2d
+; CHECK-GI-NEXT: ret
%x = call <4 x i64> @llvm.fptoui.sat.v4f16.v4i64(<4 x half> %f)
ret <4 x i64> %x
}
@@ -3114,46 +3098,19 @@ define <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) {
; CHECK-SD-FP16-NEXT: mov v3.d[1], x14
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-CVT-LABEL: test_unsigned_v8f16_v8i64:
-; CHECK-GI-CVT: // %bb.0:
-; CHECK-GI-CVT-NEXT: fcvtl v1.4s, v0.4h
-; CHECK-GI-CVT-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-GI-CVT-NEXT: fcvtl v2.2d, v1.2s
-; CHECK-GI-CVT-NEXT: fcvtl2 v1.2d, v1.4s
-; CHECK-GI-CVT-NEXT: fcvtl v3.2d, v0.2s
-; CHECK-GI-CVT-NEXT: fcvtl2 v4.2d, v0.4s
-; CHECK-GI-CVT-NEXT: fcvtzu v0.2d, v2.2d
-; CHECK-GI-CVT-NEXT: fcvtzu v1.2d, v1.2d
-; CHECK-GI-CVT-NEXT: fcvtzu v2.2d, v3.2d
-; CHECK-GI-CVT-NEXT: fcvtzu v3.2d, v4.2d
-; CHECK-GI-CVT-NEXT: ret
-;
-; CHECK-GI-FP16-LABEL: test_unsigned_v8f16_v8i64:
-; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
-; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
-; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
-; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
-; CHECK-GI-FP16-NEXT: mov h7, v0.h[7]
-; CHECK-GI-FP16-NEXT: fcvt d0, h0
-; CHECK-GI-FP16-NEXT: fcvt d1, h1
-; CHECK-GI-FP16-NEXT: fcvt d2, h2
-; CHECK-GI-FP16-NEXT: fcvt d3, h3
-; CHECK-GI-FP16-NEXT: fcvt d4, h4
-; CHECK-GI-FP16-NEXT: fcvt d5, h5
-; CHECK-GI-FP16-NEXT: fcvt d6, h6
-; CHECK-GI-FP16-NEXT: fcvt d7, h7
-; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0]
-; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0]
-; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0]
-; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d
-; CHECK-GI-FP16-NEXT: fcvtzu v2.2d, v4.2d
-; CHECK-GI-FP16-NEXT: fcvtzu v3.2d, v6.2d
-; CHECK-GI-FP16-NEXT: ret
+; CHECK-GI-LABEL: test_unsigned_v8f16_v8i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NEXT: fcvtl v2.2d, v1.2s
+; CHECK-GI-NEXT: fcvtl2 v1.2d, v1.4s
+; CHECK-GI-NEXT: fcvtl v3.2d, v0.2s
+; CHECK-GI-NEXT: fcvtl2 v4.2d, v0.4s
+; CHECK-GI-NEXT: fcvtzu v0.2d, v2.2d
+; CHECK-GI-NEXT: fcvtzu v1.2d, v1.2d
+; CHECK-GI-NEXT: fcvtzu v2.2d, v3.2d
+; CHECK-GI-NEXT: fcvtzu v3.2d, v4.2d
+; CHECK-GI-NEXT: ret
%x = call <8 x i64> @llvm.fptoui.sat.v8f16.v8i64(<8 x half> %f)
ret <8 x i64> %x
}
>From 86bc2d904d9753aa2f431e01a5cf709f12ca9a7b Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Fri, 7 Nov 2025 15:17:08 +0000
Subject: [PATCH 2/6] Use legalizer mutations instead of lowering
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 10 ++++++++++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 5 ++---
llvm/test/CodeGen/AArch64/fpext.ll | 19 +++++--------------
llvm/test/CodeGen/AArch64/fptoi.ll | 14 ++++----------
4 files changed, 21 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index b4ab9a5e4092c..08b2c1aed6c67 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3065,6 +3065,16 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
+ case TargetOpcode::G_FPEXT:
+ Observer.changingInstr(MI);
+
+ if (TypeIdx == 0)
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ else
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+
+ Observer.changedInstr(MI);
+ return Legalized;
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_INTRINSIC_LRINT:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 3542be4105f97..1130a165a2d59 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -826,14 +826,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
.libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
.moreElementsToNextPow2(0)
- .lowerIf([](const LegalityQuery &Q) {
+ .widenScalarIf([](const LegalityQuery &Q) {
LLT DstTy = Q.Types[0];
LLT SrcTy = Q.Types[1];
return SrcTy.isVector() && DstTy.isVector() &&
- SrcTy.getNumElements() > 2 &&
SrcTy.getScalarSizeInBits() == 16 &&
DstTy.getScalarSizeInBits() == 64;
- })
+ }, changeElementTo(1, s32))
.clampNumElements(0, v4s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.scalarize(0);
diff --git a/llvm/test/CodeGen/AArch64/fpext.ll b/llvm/test/CodeGen/AArch64/fpext.ll
index 295cb007467bd..8980340a447de 100644
--- a/llvm/test/CodeGen/AArch64/fpext.ll
+++ b/llvm/test/CodeGen/AArch64/fpext.ll
@@ -321,20 +321,11 @@ entry:
}
define <2 x double> @fpext_v2f16_v2f64(<2 x half> %a) {
-; CHECK-SD-LABEL: fpext_v2f16_v2f64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-SD-NEXT: fcvtl v0.2d, v0.2s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fpext_v2f16_v2f64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: fcvt d0, h0
-; CHECK-GI-NEXT: fcvt d1, h1
-; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fpext_v2f16_v2f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: ret
entry:
%c = fpext <2 x half> %a to <2 x double>
ret <2 x double> %c
diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll
index 7f747ec9b7cbb..3dafabe0b69d7 100644
--- a/llvm/test/CodeGen/AArch64/fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/fptoi.ll
@@ -4610,11 +4610,8 @@ define <2 x i64> @fptos_v2f16_v2i64(<2 x half> %a) {
;
; CHECK-FP16-GI-LABEL: fptos_v2f16_v2i64:
; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-GI-NEXT: fcvt d0, h0
-; CHECK-FP16-GI-NEXT: fcvt d1, h1
-; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v0.2s
; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-FP16-GI-NEXT: ret
entry:
@@ -4654,11 +4651,8 @@ define <2 x i64> @fptou_v2f16_v2i64(<2 x half> %a) {
;
; CHECK-FP16-GI-LABEL: fptou_v2f16_v2i64:
; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-GI-NEXT: fcvt d0, h0
-; CHECK-FP16-GI-NEXT: fcvt d1, h1
-; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v0.2s
; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d
; CHECK-FP16-GI-NEXT: ret
entry:
>From af44433857f080fc16d9a66961ee574a5a26f84d Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Mon, 10 Nov 2025 09:45:18 +0000
Subject: [PATCH 3/6] Remove lower for fpext
---
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 08b2c1aed6c67..bc06441394d20 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4862,7 +4862,6 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerMemcpyInline(MI);
case G_ZEXT:
case G_SEXT:
- case G_FPEXT:
case G_ANYEXT:
return lowerEXT(MI);
case G_TRUNC:
>From f604fbe0cd629303780c43ecf3adf627657f40c6 Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Mon, 10 Nov 2025 10:37:14 +0000
Subject: [PATCH 4/6] Add MIR test
---
llvm/test/CodeGen/AArch64/legalize-fpext.mir | 134 +++++++++++++++++++
1 file changed, 134 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/legalize-fpext.mir
diff --git a/llvm/test/CodeGen/AArch64/legalize-fpext.mir b/llvm/test/CodeGen/AArch64/legalize-fpext.mir
new file mode 100644
index 0000000000000..2eee8141a72fc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/legalize-fpext.mir
@@ -0,0 +1,134 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=aarch64 -global-isel=0 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+# RUN: llc -mtriple=aarch64 -global-isel=1 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+---
+name: fpext_f16_f64
+body: |
+ bb.0:
+ liveins: $h0
+
+ ; CHECK-LABEL: name: fpext_f16_f64
+ ; CHECK: liveins: $h0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
+ ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[COPY]](s16)
+ ; CHECK-NEXT: $d0 = COPY [[FPEXT]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:_(s16) = COPY $h0
+ %1:_(s64) = G_FPEXT %0(s16)
+ $d0 = COPY %1(s64)
+ RET_ReallyLR implicit $d0
+...
+
+---
+name: fpext_v2f16_v2f64
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: fpext_v2f16_v2f64
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+ ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>)
+ ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV]](<2 x s32>)
+ ; CHECK-NEXT: $q0 = COPY [[FPEXT1]](<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %1:_(<4 x s16>) = COPY $d0
+ %0:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %1(<4 x s16>)
+ %3:_(<2 x s64>) = G_FPEXT %0(<2 x s16>)
+ $q0 = COPY %3(<2 x s64>)
+ RET_ReallyLR implicit $q0
+...
+
+---
+name: fpext_v3f16_v3f64
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: fpext_v3f16_v3f64
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+ ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>)
+ ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV]](<2 x s32>)
+ ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV1]](<2 x s32>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FPEXT1]](<2 x s64>)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FPEXT2]](<2 x s64>)
+ ; CHECK-NEXT: $d0 = COPY [[UV2]](s64)
+ ; CHECK-NEXT: $d1 = COPY [[UV3]](s64)
+ ; CHECK-NEXT: $d2 = COPY [[UV4]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0, implicit $d1, implicit $d2
+ %1:_(<4 x s16>) = COPY $d0
+ %2:_(s16), %3:_(s16), %4:_(s16), %5:_(s16) = G_UNMERGE_VALUES %1(<4 x s16>)
+ %0:_(<3 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16)
+ %6:_(<3 x s64>) = G_FPEXT %0(<3 x s16>)
+ %7:_(s64), %8:_(s64), %9:_(s64) = G_UNMERGE_VALUES %6(<3 x s64>)
+ $d0 = COPY %7(s64)
+ $d1 = COPY %8(s64)
+ $d2 = COPY %9(s64)
+ RET_ReallyLR implicit $d0, implicit $d1, implicit $d2
+...
+
+---
+name: fpext_v4f16_v4f64
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: fpext_v4f16_v4f64
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+ ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>)
+ ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV]](<2 x s32>)
+ ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV1]](<2 x s32>)
+ ; CHECK-NEXT: $q0 = COPY [[FPEXT1]](<2 x s64>)
+ ; CHECK-NEXT: $q1 = COPY [[FPEXT2]](<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1
+ %0:_(<4 x s16>) = COPY $d0
+ %1:_(<4 x s64>) = G_FPEXT %0(<4 x s16>)
+ %2:_(<2 x s64>), %3:_(<2 x s64>) = G_UNMERGE_VALUES %1(<4 x s64>)
+ $q0 = COPY %2(<2 x s64>)
+ $q1 = COPY %3(<2 x s64>)
+ RET_ReallyLR implicit $q0, implicit $q1
+...
+
+---
+name: fpext_v8f16_v8f64
+body: |
+ bb.0:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: fpext_v8f16_v8f64
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
+ ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>)
+ ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT1]](<4 x s32>)
+ ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV2]](<2 x s32>)
+ ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV3]](<2 x s32>)
+ ; CHECK-NEXT: [[FPEXT4:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV4]](<2 x s32>)
+ ; CHECK-NEXT: [[FPEXT5:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV5]](<2 x s32>)
+ ; CHECK-NEXT: $q0 = COPY [[FPEXT2]](<2 x s64>)
+ ; CHECK-NEXT: $q1 = COPY [[FPEXT3]](<2 x s64>)
+ ; CHECK-NEXT: $q2 = COPY [[FPEXT4]](<2 x s64>)
+ ; CHECK-NEXT: $q3 = COPY [[FPEXT5]](<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1, implicit $q2, implicit $q3
+ %0:_(<8 x s16>) = COPY $q0
+ %1:_(<8 x s64>) = G_FPEXT %0(<8 x s16>)
+ %2:_(<2 x s64>), %3:_(<2 x s64>), %4:_(<2 x s64>), %5:_(<2 x s64>) = G_UNMERGE_VALUES %1(<8 x s64>)
+ $q0 = COPY %2(<2 x s64>)
+ $q1 = COPY %3(<2 x s64>)
+ $q2 = COPY %4(<2 x s64>)
+ $q3 = COPY %5(<2 x s64>)
+ RET_ReallyLR implicit $q0, implicit $q1, implicit $q2, implicit $q3
+...
>From ca386e187cee0ab538bbc2c1d82f7aadaa19cbd0 Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Mon, 10 Nov 2025 10:42:14 +0000
Subject: [PATCH 5/6] Linting
---
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 1130a165a2d59..1898b914d5f0e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -826,13 +826,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
.libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
.moreElementsToNextPow2(0)
- .widenScalarIf([](const LegalityQuery &Q) {
- LLT DstTy = Q.Types[0];
- LLT SrcTy = Q.Types[1];
- return SrcTy.isVector() && DstTy.isVector() &&
- SrcTy.getScalarSizeInBits() == 16 &&
- DstTy.getScalarSizeInBits() == 64;
- }, changeElementTo(1, s32))
+ .widenScalarIf(
+ [](const LegalityQuery &Q) {
+ LLT DstTy = Q.Types[0];
+ LLT SrcTy = Q.Types[1];
+ return SrcTy.isVector() && DstTy.isVector() &&
+ SrcTy.getScalarSizeInBits() == 16 &&
+ DstTy.getScalarSizeInBits() == 64;
+ },
+ changeElementTo(1, s32))
.clampNumElements(0, v4s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.scalarize(0);
>From d828d2ae6336e05aa009709361423e1b48c66566 Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Mon, 10 Nov 2025 11:33:49 +0000
Subject: [PATCH 6/6] Rework test & add to existing
---
.../AArch64/GlobalISel/legalize-fpext.mir | 130 ++++++++++++++++++
1 file changed, 130 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir
index 1c10e08d54c61..50394b6bbbf99 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir
@@ -32,3 +32,133 @@ body: |
RET_ReallyLR
...
+
+---
+name: fpext_f16_f64
+body: |
+ bb.0:
+ liveins: $h0
+ ; CHECK-LABEL: name: fpext_f16_f64
+ ; CHECK: liveins: $h0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
+ ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[COPY]](s16)
+ ; CHECK-NEXT: $d0 = COPY [[FPEXT]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:_(s16) = COPY $h0
+ %1:_(s64) = G_FPEXT %0(s16)
+ $d0 = COPY %1(s64)
+ RET_ReallyLR implicit $d0
+...
+
+---
+name: fpext_v2f16_v2f64
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: fpext_v2f16_v2f64
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+ ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>)
+ ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV]](<2 x s32>)
+ ; CHECK-NEXT: $q0 = COPY [[FPEXT1]](<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %1:_(<4 x s16>) = COPY $d0
+ %0:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %1(<4 x s16>)
+ %3:_(<2 x s64>) = G_FPEXT %0(<2 x s16>)
+ $q0 = COPY %3(<2 x s64>)
+ RET_ReallyLR implicit $q0
+...
+
+---
+name: fpext_v3f16_v3f64
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: fpext_v3f16_v3f64
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+ ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>)
+ ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV]](<2 x s32>)
+ ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV1]](<2 x s32>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FPEXT1]](<2 x s64>)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FPEXT2]](<2 x s64>)
+ ; CHECK-NEXT: $d0 = COPY [[UV2]](s64)
+ ; CHECK-NEXT: $d1 = COPY [[UV3]](s64)
+ ; CHECK-NEXT: $d2 = COPY [[UV4]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0, implicit $d1, implicit $d2
+ %1:_(<4 x s16>) = COPY $d0
+ %2:_(s16), %3:_(s16), %4:_(s16), %5:_(s16) = G_UNMERGE_VALUES %1(<4 x s16>)
+ %0:_(<3 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16)
+ %6:_(<3 x s64>) = G_FPEXT %0(<3 x s16>)
+ %7:_(s64), %8:_(s64), %9:_(s64) = G_UNMERGE_VALUES %6(<3 x s64>)
+ $d0 = COPY %7(s64)
+ $d1 = COPY %8(s64)
+ $d2 = COPY %9(s64)
+ RET_ReallyLR implicit $d0, implicit $d1, implicit $d2
+...
+
+---
+name: fpext_v4f16_v4f64
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: fpext_v4f16_v4f64
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+ ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>)
+ ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV]](<2 x s32>)
+ ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV1]](<2 x s32>)
+ ; CHECK-NEXT: $q0 = COPY [[FPEXT1]](<2 x s64>)
+ ; CHECK-NEXT: $q1 = COPY [[FPEXT2]](<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1
+ %0:_(<4 x s16>) = COPY $d0
+ %1:_(<4 x s64>) = G_FPEXT %0(<4 x s16>)
+ %2:_(<2 x s64>), %3:_(<2 x s64>) = G_UNMERGE_VALUES %1(<4 x s64>)
+ $q0 = COPY %2(<2 x s64>)
+ $q1 = COPY %3(<2 x s64>)
+ RET_ReallyLR implicit $q0, implicit $q1
+...
+
+---
+name: fpext_v8f16_v8f64
+body: |
+ bb.0:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: fpext_v8f16_v8f64
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
+ ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>)
+ ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT1]](<4 x s32>)
+ ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV2]](<2 x s32>)
+ ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV3]](<2 x s32>)
+ ; CHECK-NEXT: [[FPEXT4:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV4]](<2 x s32>)
+ ; CHECK-NEXT: [[FPEXT5:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV5]](<2 x s32>)
+ ; CHECK-NEXT: $q0 = COPY [[FPEXT2]](<2 x s64>)
+ ; CHECK-NEXT: $q1 = COPY [[FPEXT3]](<2 x s64>)
+ ; CHECK-NEXT: $q2 = COPY [[FPEXT4]](<2 x s64>)
+ ; CHECK-NEXT: $q3 = COPY [[FPEXT5]](<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1, implicit $q2, implicit $q3
+ %0:_(<8 x s16>) = COPY $q0
+ %1:_(<8 x s64>) = G_FPEXT %0(<8 x s16>)
+ %2:_(<2 x s64>), %3:_(<2 x s64>), %4:_(<2 x s64>), %5:_(<2 x s64>) = G_UNMERGE_VALUES %1(<8 x s64>)
+ $q0 = COPY %2(<2 x s64>)
+ $q1 = COPY %3(<2 x s64>)
+ $q2 = COPY %4(<2 x s64>)
+ $q3 = COPY %5(<2 x s64>)
+ RET_ReallyLR implicit $q0, implicit $q1, implicit $q2, implicit $q3
+...
More information about the llvm-commits
mailing list