[llvm] [AArch64][GlobalISel] Fix lowering of i64->f32 itofp. (PR #132703)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 24 02:54:46 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
This is a GISel equivalent of #<!-- -->130665, preventing a double-rounding issue in sitofp/uitofp by scalarizing i64->f32 converts. Most of the changes are made in the ActionDefinitionsBuilder for G_SITOFP/G_UITOFP. Because it is legal to convert i64->f16 itofp without double-rounding, but not a fpround f64->f16, that variant is lowered to build the two extends.
---
Patch is 71.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132703.diff
3 Files Affected:
- (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+22)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+14-2)
- (modified) llvm/test/CodeGen/AArch64/itofp.ll (+698-880)
``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index a9f80860124fb..dc4873b88d248 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7609,6 +7609,22 @@ LegalizerHelper::lowerU64ToF64BitFloatOps(MachineInstr &MI) {
return Legalized;
}
+/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
+/// convert fpround f64->f16 without double-rounding, so we manually perform the
+/// lowering here where we know it is valid.
+static LegalizerHelper::LegalizeResult
+loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src,
+ LLT SrcTy, MachineIRBuilder &MIRBuilder) {
+ auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
+ ? MIRBuilder.buildUITOFP(SrcTy, Src)
+ : MIRBuilder.buildSITOFP(SrcTy, Src);
+ LLT S32Ty = SrcTy.changeElementSize(32);
+ auto M2 = MIRBuilder.buildFPTrunc(S32Ty, M1);
+ MIRBuilder.buildFPTrunc(Dst, M2);
+ MI.eraseFromParent();
+ return LegalizerHelper::Legalized;
+}
+
LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
@@ -7620,6 +7636,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
return Legalized;
}
+ if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
+ return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
+
if (SrcTy != LLT::scalar(64))
return UnableToLegalize;
@@ -7651,6 +7670,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
return Legalized;
}
+ if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
+ return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
+
if (SrcTy != S64)
return UnableToLegalize;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index c36b20badfc09..4ccf3e260e991 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -917,16 +917,28 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.moreElementsToNextPow2(1)
.widenScalarOrEltToNextPow2OrMinSize(1)
.minScalar(1, s32)
+ .lowerIf([](const LegalityQuery &Query) {
+ return Query.Types[1].isVector() &&
+ Query.Types[1].getScalarSizeInBits() == 64 &&
+ Query.Types[0].getScalarSizeInBits() == 16;
+ })
.widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
+ .scalarizeIf(
+ // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
+ [](const LegalityQuery &Query) {
+ return Query.Types[0].getScalarSizeInBits() == 32 &&
+ Query.Types[1].getScalarSizeInBits() == 64;
+ },
+ 0)
.widenScalarIf(
- [=](const LegalityQuery &Query) {
+ [](const LegalityQuery &Query) {
return Query.Types[1].getScalarSizeInBits() <= 64 &&
Query.Types[0].getScalarSizeInBits() <
Query.Types[1].getScalarSizeInBits();
},
LegalizeMutations::changeElementSizeTo(0, 1))
.widenScalarIf(
- [=](const LegalityQuery &Query) {
+ [](const LegalityQuery &Query) {
return Query.Types[0].getScalarSizeInBits() <= 64 &&
Query.Types[0].getScalarSizeInBits() >
Query.Types[1].getScalarSizeInBits();
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index 81c1a64f2d434..8bcf7c2a04ae3 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -4421,22 +4421,42 @@ entry:
}
define <2 x float> @stofp_v2i64_v2f32(<2 x i64> %a) {
-; CHECK-LABEL: stofp_v2i64_v2f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: stofp_v2i64_v2f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: scvtf v0.2d, v0.2d
+; CHECK-SD-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: stofp_v2i64_v2f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov x8, v0.d[1]
+; CHECK-GI-NEXT: fmov x9, d0
+; CHECK-GI-NEXT: scvtf s0, x9
+; CHECK-GI-NEXT: scvtf s1, x8
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
entry:
%c = sitofp <2 x i64> %a to <2 x float>
ret <2 x float> %c
}
define <2 x float> @utofp_v2i64_v2f32(<2 x i64> %a) {
-; CHECK-LABEL: utofp_v2i64_v2f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: utofp_v2i64_v2f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-SD-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: utofp_v2i64_v2f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov x8, v0.d[1]
+; CHECK-GI-NEXT: fmov x9, d0
+; CHECK-GI-NEXT: ucvtf s0, x9
+; CHECK-GI-NEXT: ucvtf s1, x8
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
entry:
%c = uitofp <2 x i64> %a to <2 x float>
ret <2 x float> %c
@@ -4457,16 +4477,13 @@ define <3 x float> @stofp_v3i64_v3f32(<3 x i64> %a) {
;
; CHECK-GI-LABEL: stofp_v3i64_v3f32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT: scvtf v2.2d, v2.2d
-; CHECK-GI-NEXT: scvtf v0.2d, v0.2d
-; CHECK-GI-NEXT: fcvtn v2.2s, v2.2d
-; CHECK-GI-NEXT: fcvtn v1.2s, v0.2d
-; CHECK-GI-NEXT: mov v0.s[0], v1.s[0]
-; CHECK-GI-NEXT: mov v0.s[1], v1.s[1]
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: fmov x9, d1
+; CHECK-GI-NEXT: scvtf s0, x8
+; CHECK-GI-NEXT: scvtf s1, x9
+; CHECK-GI-NEXT: fmov x8, d2
+; CHECK-GI-NEXT: scvtf s2, x8
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
; CHECK-GI-NEXT: ret
entry:
@@ -4489,16 +4506,13 @@ define <3 x float> @utofp_v3i64_v3f32(<3 x i64> %a) {
;
; CHECK-GI-LABEL: utofp_v3i64_v3f32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT: ucvtf v2.2d, v2.2d
-; CHECK-GI-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-GI-NEXT: fcvtn v2.2s, v2.2d
-; CHECK-GI-NEXT: fcvtn v1.2s, v0.2d
-; CHECK-GI-NEXT: mov v0.s[0], v1.s[0]
-; CHECK-GI-NEXT: mov v0.s[1], v1.s[1]
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: fmov x9, d1
+; CHECK-GI-NEXT: ucvtf s0, x8
+; CHECK-GI-NEXT: ucvtf s1, x9
+; CHECK-GI-NEXT: fmov x8, d2
+; CHECK-GI-NEXT: ucvtf s2, x8
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
; CHECK-GI-NEXT: ret
entry:
@@ -4507,26 +4521,56 @@ entry:
}
define <4 x float> @stofp_v4i64_v4f32(<4 x i64> %a) {
-; CHECK-LABEL: stofp_v4i64_v4f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NEXT: scvtf v1.2d, v1.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: stofp_v4i64_v4f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: scvtf v0.2d, v0.2d
+; CHECK-SD-NEXT: scvtf v1.2d, v1.2d
+; CHECK-SD-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-SD-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: stofp_v4i64_v4f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov x8, v0.d[1]
+; CHECK-GI-NEXT: fmov x9, d0
+; CHECK-GI-NEXT: scvtf s0, x9
+; CHECK-GI-NEXT: mov x9, v1.d[1]
+; CHECK-GI-NEXT: scvtf s2, x8
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: scvtf s1, x8
+; CHECK-GI-NEXT: mov v0.s[1], v2.s[0]
+; CHECK-GI-NEXT: scvtf s2, x9
+; CHECK-GI-NEXT: mov v0.s[2], v1.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v2.s[0]
+; CHECK-GI-NEXT: ret
entry:
%c = sitofp <4 x i64> %a to <4 x float>
ret <4 x float> %c
}
define <4 x float> @utofp_v4i64_v4f32(<4 x i64> %a) {
-; CHECK-LABEL: utofp_v4i64_v4f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEXT: ucvtf v1.2d, v1.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: utofp_v4i64_v4f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-SD-NEXT: ucvtf v1.2d, v1.2d
+; CHECK-SD-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-SD-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: utofp_v4i64_v4f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov x8, v0.d[1]
+; CHECK-GI-NEXT: fmov x9, d0
+; CHECK-GI-NEXT: ucvtf s0, x9
+; CHECK-GI-NEXT: mov x9, v1.d[1]
+; CHECK-GI-NEXT: ucvtf s2, x8
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: ucvtf s1, x8
+; CHECK-GI-NEXT: mov v0.s[1], v2.s[0]
+; CHECK-GI-NEXT: ucvtf s2, x9
+; CHECK-GI-NEXT: mov v0.s[2], v1.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v2.s[0]
+; CHECK-GI-NEXT: ret
entry:
%c = uitofp <4 x i64> %a to <4 x float>
ret <4 x float> %c
@@ -4547,14 +4591,29 @@ define <8 x float> @stofp_v8i64_v8f32(<8 x i64> %a) {
;
; CHECK-GI-LABEL: stofp_v8i64_v8f32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: scvtf v0.2d, v0.2d
-; CHECK-GI-NEXT: scvtf v2.2d, v2.2d
-; CHECK-GI-NEXT: scvtf v4.2d, v1.2d
-; CHECK-GI-NEXT: scvtf v3.2d, v3.2d
-; CHECK-GI-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-GI-NEXT: fcvtn v1.2s, v2.2d
-; CHECK-GI-NEXT: fcvtn2 v0.4s, v4.2d
-; CHECK-GI-NEXT: fcvtn2 v1.4s, v3.2d
+; CHECK-GI-NEXT: fmov x10, d0
+; CHECK-GI-NEXT: mov x9, v2.d[1]
+; CHECK-GI-NEXT: mov x8, v0.d[1]
+; CHECK-GI-NEXT: scvtf s0, x10
+; CHECK-GI-NEXT: fmov x10, d2
+; CHECK-GI-NEXT: scvtf s5, x9
+; CHECK-GI-NEXT: fmov x9, d1
+; CHECK-GI-NEXT: scvtf s4, x8
+; CHECK-GI-NEXT: mov x8, v1.d[1]
+; CHECK-GI-NEXT: scvtf s2, x10
+; CHECK-GI-NEXT: fmov x10, d3
+; CHECK-GI-NEXT: scvtf s1, x9
+; CHECK-GI-NEXT: mov x9, v3.d[1]
+; CHECK-GI-NEXT: mov v0.s[1], v4.s[0]
+; CHECK-GI-NEXT: scvtf s3, x10
+; CHECK-GI-NEXT: scvtf s4, x8
+; CHECK-GI-NEXT: mov v2.s[1], v5.s[0]
+; CHECK-GI-NEXT: scvtf s5, x9
+; CHECK-GI-NEXT: mov v0.s[2], v1.s[0]
+; CHECK-GI-NEXT: mov v2.s[2], v3.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v4.s[0]
+; CHECK-GI-NEXT: mov v2.s[3], v5.s[0]
+; CHECK-GI-NEXT: mov v1.16b, v2.16b
; CHECK-GI-NEXT: ret
entry:
%c = sitofp <8 x i64> %a to <8 x float>
@@ -4576,14 +4635,29 @@ define <8 x float> @utofp_v8i64_v8f32(<8 x i64> %a) {
;
; CHECK-GI-LABEL: utofp_v8i64_v8f32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-GI-NEXT: ucvtf v2.2d, v2.2d
-; CHECK-GI-NEXT: ucvtf v4.2d, v1.2d
-; CHECK-GI-NEXT: ucvtf v3.2d, v3.2d
-; CHECK-GI-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-GI-NEXT: fcvtn v1.2s, v2.2d
-; CHECK-GI-NEXT: fcvtn2 v0.4s, v4.2d
-; CHECK-GI-NEXT: fcvtn2 v1.4s, v3.2d
+; CHECK-GI-NEXT: fmov x10, d0
+; CHECK-GI-NEXT: mov x9, v2.d[1]
+; CHECK-GI-NEXT: mov x8, v0.d[1]
+; CHECK-GI-NEXT: ucvtf s0, x10
+; CHECK-GI-NEXT: fmov x10, d2
+; CHECK-GI-NEXT: ucvtf s5, x9
+; CHECK-GI-NEXT: fmov x9, d1
+; CHECK-GI-NEXT: ucvtf s4, x8
+; CHECK-GI-NEXT: mov x8, v1.d[1]
+; CHECK-GI-NEXT: ucvtf s2, x10
+; CHECK-GI-NEXT: fmov x10, d3
+; CHECK-GI-NEXT: ucvtf s1, x9
+; CHECK-GI-NEXT: mov x9, v3.d[1]
+; CHECK-GI-NEXT: mov v0.s[1], v4.s[0]
+; CHECK-GI-NEXT: ucvtf s3, x10
+; CHECK-GI-NEXT: ucvtf s4, x8
+; CHECK-GI-NEXT: mov v2.s[1], v5.s[0]
+; CHECK-GI-NEXT: ucvtf s5, x9
+; CHECK-GI-NEXT: mov v0.s[2], v1.s[0]
+; CHECK-GI-NEXT: mov v2.s[2], v3.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v4.s[0]
+; CHECK-GI-NEXT: mov v2.s[3], v5.s[0]
+; CHECK-GI-NEXT: mov v1.16b, v2.16b
; CHECK-GI-NEXT: ret
entry:
%c = uitofp <8 x i64> %a to <8 x float>
@@ -4591,50 +4665,146 @@ entry:
}
define <16 x float> @stofp_v16i64_v16f32(<16 x i64> %a) {
-; CHECK-LABEL: stofp_v16i64_v16f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NEXT: scvtf v2.2d, v2.2d
-; CHECK-NEXT: scvtf v4.2d, v4.2d
-; CHECK-NEXT: scvtf v6.2d, v6.2d
-; CHECK-NEXT: scvtf v16.2d, v1.2d
-; CHECK-NEXT: scvtf v17.2d, v3.2d
-; CHECK-NEXT: scvtf v5.2d, v5.2d
-; CHECK-NEXT: scvtf v7.2d, v7.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn v1.2s, v2.2d
-; CHECK-NEXT: fcvtn v2.2s, v4.2d
-; CHECK-NEXT: fcvtn v3.2s, v6.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v16.2d
-; CHECK-NEXT: fcvtn2 v1.4s, v17.2d
-; CHECK-NEXT: fcvtn2 v2.4s, v5.2d
-; CHECK-NEXT: fcvtn2 v3.4s, v7.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: stofp_v16i64_v16f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: scvtf v0.2d, v0.2d
+; CHECK-SD-NEXT: scvtf v2.2d, v2.2d
+; CHECK-SD-NEXT: scvtf v4.2d, v4.2d
+; CHECK-SD-NEXT: scvtf v6.2d, v6.2d
+; CHECK-SD-NEXT: scvtf v16.2d, v1.2d
+; CHECK-SD-NEXT: scvtf v17.2d, v3.2d
+; CHECK-SD-NEXT: scvtf v5.2d, v5.2d
+; CHECK-SD-NEXT: scvtf v7.2d, v7.2d
+; CHECK-SD-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-SD-NEXT: fcvtn v1.2s, v2.2d
+; CHECK-SD-NEXT: fcvtn v2.2s, v4.2d
+; CHECK-SD-NEXT: fcvtn v3.2s, v6.2d
+; CHECK-SD-NEXT: fcvtn2 v0.4s, v16.2d
+; CHECK-SD-NEXT: fcvtn2 v1.4s, v17.2d
+; CHECK-SD-NEXT: fcvtn2 v2.4s, v5.2d
+; CHECK-SD-NEXT: fcvtn2 v3.4s, v7.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: stofp_v16i64_v16f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov x9, v0.d[1]
+; CHECK-GI-NEXT: fmov x13, d2
+; CHECK-GI-NEXT: fmov x11, d0
+; CHECK-GI-NEXT: mov x12, v4.d[1]
+; CHECK-GI-NEXT: mov x8, v1.d[1]
+; CHECK-GI-NEXT: mov x10, v2.d[1]
+; CHECK-GI-NEXT: scvtf s0, x11
+; CHECK-GI-NEXT: mov x11, v6.d[1]
+; CHECK-GI-NEXT: scvtf s16, x9
+; CHECK-GI-NEXT: fmov x9, d1
+; CHECK-GI-NEXT: scvtf s1, x13
+; CHECK-GI-NEXT: fmov x13, d4
+; CHECK-GI-NEXT: scvtf s4, x12
+; CHECK-GI-NEXT: fmov x12, d6
+; CHECK-GI-NEXT: scvtf s17, x10
+; CHECK-GI-NEXT: mov x10, v3.d[1]
+; CHECK-GI-NEXT: scvtf s6, x11
+; CHECK-GI-NEXT: fmov x11, d5
+; CHECK-GI-NEXT: scvtf s18, x9
+; CHECK-GI-NEXT: scvtf s2, x13
+; CHECK-GI-NEXT: fmov x13, d3
+; CHECK-GI-NEXT: scvtf s3, x12
+; CHECK-GI-NEXT: mov x9, v5.d[1]
+; CHECK-GI-NEXT: mov x12, v7.d[1]
+; CHECK-GI-NEXT: mov v0.s[1], v16.s[0]
+; CHECK-GI-NEXT: scvtf s5, x11
+; CHECK-GI-NEXT: mov v1.s[1], v17.s[0]
+; CHECK-GI-NEXT: scvtf s19, x13
+; CHECK-GI-NEXT: fmov x13, d7
+; CHECK-GI-NEXT: mov v2.s[1], v4.s[0]
+; CHECK-GI-NEXT: mov v3.s[1], v6.s[0]
+; CHECK-GI-NEXT: scvtf s4, x8
+; CHECK-GI-NEXT: scvtf s6, x10
+; CHECK-GI-NEXT: scvtf s16, x9
+; CHECK-GI-NEXT: scvtf s17, x12
+; CHECK-GI-NEXT: scvtf s7, x13
+; CHECK-GI-NEXT: mov v0.s[2], v18.s[0]
+; CHECK-GI-NEXT: mov v1.s[2], v19.s[0]
+; CHECK-GI-NEXT: mov v2.s[2], v5.s[0]
+; CHECK-GI-NEXT: mov v3.s[2], v7.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v4.s[0]
+; CHECK-GI-NEXT: mov v1.s[3], v6.s[0]
+; CHECK-GI-NEXT: mov v2.s[3], v16.s[0]
+; CHECK-GI-NEXT: mov v3.s[3], v17.s[0]
+; CHECK-GI-NEXT: ret
entry:
%c = sitofp <16 x i64> %a to <16 x float>
ret <16 x float> %c
}
define <16 x float> @utofp_v16i64_v16f32(<16 x i64> %a) {
-; CHECK-LABEL: utofp_v16i64_v16f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEXT: ucvtf v2.2d, v2.2d
-; CHECK-NEXT: ucvtf v4.2d, v4.2d
-; CHECK-NEXT: ucvtf v6.2d, v6.2d
-; CHECK-NEXT: ucvtf v16.2d, v1.2d
-; CHECK-NEXT: ucvtf v17.2d, v3.2d
-; CHECK-NEXT: ucvtf v5.2d, v5.2d
-; CHECK-NEXT: ucvtf v7.2d, v7.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn v1.2s, v2.2d
-; CHECK-NEXT: fcvtn v2.2s, v4.2d
-; CHECK-NEXT: fcvtn v3.2s, v6.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v16.2d
-; CHECK-NEXT: fcvtn2 v1.4s, v17.2d
-; CHECK-NEXT: fcvtn2 v2.4s, v5.2d
-; CHECK-NEXT: fcvtn2 v3.4s, v7.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: utofp_v16i64_v16f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-SD-NEXT: ucvtf v2.2d, v2.2d
+; CHECK-SD-NEXT: ucvtf v4.2d, v4.2d
+; CHECK-SD-NEXT: ucvtf v6.2d, v6.2d
+; CHECK-SD-NEXT: ucvtf v16.2d, v1.2d
+; CHECK-SD-NEXT: ucvtf v17.2d, v3.2d
+; CHECK-SD-NEXT: ucvtf v5.2d, v5.2d
+; CHECK-SD-NEXT: ucvtf v7.2d, v7.2d
+; CHECK-SD-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-SD-NEXT: fcvtn v1.2s, v2.2d
+; CHECK-SD-NEXT: fcvtn v2.2s, v4.2d
+; CHECK-SD-NEXT: fcvtn v3.2s, v6.2d
+; CHECK-SD-NEXT: fcvtn2 v0.4s, v16.2d
+; CHECK-SD-NEXT: fcvtn2 v1.4s, v17.2d
+; CHECK-SD-NEXT: fcvtn2 v2.4s, v5.2d
+; CHECK-SD-NEXT: fcvtn2 v3.4s, v7.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: utofp_v16i64_v16f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov x9, v0.d[1]
+; CHECK-GI-NEXT: fmov x13, d2
+; CHECK-GI-NEXT: fmov x11, d0
+; CHECK-GI-NEXT: mov x12, v4.d[1]
+; CHECK-GI-NEXT: mov x8, v1.d[1]
+; CHECK-GI-NEXT: mov x10, v2.d[1]
+; CHECK-GI-NEXT: ucvtf s0, x11
+; CHECK-GI-NEXT: mov x11, v6.d[1]
+; CHECK-GI-NEXT: ucvtf s16, x9
+; CHECK-GI-NEXT: fmov x9, d1
+; CHECK-GI-NEXT: ucvtf s1, x13
+; CHECK-GI-NEXT: fmov x13, d4
+; CHECK-GI-NEXT: ucvtf s4, x12
+; CHECK-GI-NEXT: fmov x12, d6
+; CHECK-GI-NEXT: ucvtf s17, x10
+; CHECK-GI-NEXT: mov x10, v3.d[1]
+; CHECK-GI-NEXT: ucvtf s6, x11
+; CHECK-GI-NEXT: fmov x11, d5
+; CHECK-GI-NEXT: ucvtf s18, x9
+; CHECK-GI-NEXT: ucvtf s2, x13
+; CHECK-GI-NEXT: fmov x13, d3
+; CHECK-GI-NEXT: ucvtf s3, x12
+; CHECK-GI-NEXT: mov x9, v5.d[1]
+; CHECK-GI-NEXT: mov x12, v7.d[1]
+; CHECK-GI-NEXT: mov v0.s[1], v16.s[0]
+; CHECK-GI-NEXT: ucvtf s5, x11
+; CHECK-GI-NEXT: mov v1.s[1], v17.s[0]
+; CHECK-GI-NEXT: ucvtf s19, x13
+; CHECK-GI-NEXT: fmov x13, d7
+; CHECK-GI-NEXT: mov v2.s[1], v4.s[0]
+; CHECK-GI-NEXT: mov v3.s[1], v6.s[0]
+; CHECK-GI-NEXT: ucvtf s4, x8
+; CHECK-GI-NEXT: ucvtf s6, x10
+; CHECK-GI-NEXT: ucvtf s16, x9
+; CHECK-GI-NEXT: ucvtf s17, x12
+; CHECK-GI-NEXT: ucvtf s7, x13
+; CHECK-GI-NEXT: mov v0.s[2], v18.s[0]
+; CHECK-GI-NEXT: mov v1.s[2], v19.s[0]
+; CHECK-GI-NEXT: mov v2.s[2], v5.s[0]
+; CHECK-GI-NEXT: mov v3.s[2], v7.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v4.s[0]
+; CHECK-GI-NEXT: mov v1.s[3], v6.s[0]
+; CHECK-GI-NEXT: mov v2.s[3], v16.s[0]
+; CHECK-GI-NEXT: mov v3.s[3], v17.s[0]
+; CHECK-GI-NEXT: ret
entry:
%c = uitofp <16 x i64> %a to <16 x float>
ret <16 x float> %c
@@ -4683,42 +4853,110 @@ define <32 x float> @stofp_v32i64_v32f32(<32 x i64> %a) {
;
; CHECK-GI-LABEL: stofp_v32i64_v32f32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldp q16, q17, [sp]
-; CHECK-GI-NEXT: scvtf v0.2d, v0.2d
-; CHECK-GI-NEXT: ldp q18, q19, [sp, #32]
-; CHECK-GI-NEXT: scvtf v24.2d, v1.2d
-; CHECK-GI-NEXT: ldp q20, q21, [sp, #64]
-; CHECK-GI-NEXT: scvtf v1.2d, v2.2d
-; CHECK-GI-NEXT: ldp q22, q23, [sp, #96]
-; CHECK-GI-NEXT: scvtf v25.2d, v3.2d
-; CHECK-GI-NEXT: scvtf v2.2d, v4.2d
-; CHECK-GI-NEXT: scvtf v26.2d, v5.2d
-; CHECK-GI-NEXT: scvtf v3.2d, v6.2d
-; CHECK-GI-NEXT: scvtf v27.2d, v7.2d
-; CHECK-GI-NEXT: scvtf v4.2d, v16.2d
-; CHECK-GI-NEXT: scvtf v5.2d, v18.2d
-; CHECK-GI-NEXT: scvtf v6.2d, v20.2d
-; CHECK-GI-NEXT: scvtf v7.2d, v22.2d
-; CHECK-GI-NEXT: scvtf v16.2d, v17.2d
-; CHECK-GI-NEXT: scvtf v17.2d, v19.2d
-; CHECK-GI-NEXT: scvtf v18.2d, v21.2d
-; CHECK-GI-NEXT: scvtf v19.2d, v23.2d
-; CHECK-GI-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-GI-NEXT: fcvtn v1.2s, v1.2d
-; CHECK-GI-NEXT: fcvtn v2.2s, v2.2d
-; CHECK-GI-NEXT: fcvtn v3.2s, v3.2d
-; CHECK-...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/132703
More information about the llvm-commits
mailing list