[llvm-branch-commits] [llvm] release/22.x: [SelectionDAG] Fix fptoui.sat expansion using minnum/maxnum (#180178) (PR #184565)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Mar 4 00:39:16 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: None (llvmbot)
<details>
<summary>Changes</summary>
Backport cfca635efc11e4de10d560810fd4837510953c05
Requested by: @<!-- -->wangleiat
---
Patch is 174.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/184565.diff
12 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+28-10)
- (modified) llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll (+93-145)
- (modified) llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll (+107-112)
- (modified) llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll (+84-142)
- (modified) llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll (+212-132)
- (added) llvm/test/CodeGen/Hexagon/fptoi.sat.ll (+247)
- (modified) llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll (+566-306)
- (modified) llvm/test/CodeGen/X86/fpclamptosat.ll (+8-16)
- (modified) llvm/test/CodeGen/X86/fptosi-sat-scalar.ll (+57-100)
- (modified) llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll (+114-177)
- (modified) llvm/test/CodeGen/X86/fptoui-sat-scalar.ll (+55-107)
- (modified) llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll (+86-152)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ee489f9fc74f9..8f4e716c1d665 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -11868,22 +11868,27 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
// If the integer bounds are exactly representable as floats and min/max are
// legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
// of comparisons and selects.
- bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
- isOperationLegal(ISD::FMAXNUM, SrcVT);
- if (AreExactFloatBounds && MinMaxLegal) {
+ auto EmitMinMax = [&](unsigned MinOpcode, unsigned MaxOpcode,
+ bool MayPropagateNaN) {
+ bool MinMaxLegal = isOperationLegalOrCustom(MinOpcode, SrcVT) &&
+ isOperationLegalOrCustom(MaxOpcode, SrcVT);
+ if (!MinMaxLegal)
+ return SDValue();
+
SDValue Clamped = Src;
- // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
- Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
- // Clamp by MaxFloat from above. NaN cannot occur.
- Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
+ // Clamp Src by MinFloat from below. If !MayPropagateNaN and Src is NaN
+ // then the result is MinFloat.
+ Clamped = DAG.getNode(MaxOpcode, dl, SrcVT, Clamped, MinFloatNode);
+ // Clamp by MaxFloat from above. If !MayPropagateNaN then NaN cannot occur.
+ Clamped = DAG.getNode(MinOpcode, dl, SrcVT, Clamped, MaxFloatNode);
// Convert clamped value to integer.
SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
dl, DstVT, Clamped);
- // In the unsigned case we're done, because we mapped NaN to MinFloat,
- // which will cast to zero.
- if (!IsSigned)
+ // If !MayPropagateNan and the conversion is unsigned case we're done,
+ // because we mapped NaN to MinFloat, which will cast to zero.
+ if (!MayPropagateNaN && !IsSigned)
return FpToInt;
// Otherwise, select 0 if Src is NaN.
@@ -11892,6 +11897,19 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
+ };
+ if (AreExactFloatBounds) {
+ if (SDValue Res = EmitMinMax(ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM,
+ /*MayPropagateNaN=*/false))
+ return Res;
+ // These may propagate NaN for sNaN operands.
+ if (SDValue Res =
+ EmitMinMax(ISD::FMINNUM, ISD::FMAXNUM, /*MayPropagateNaN=*/true))
+ return Res;
+ // These always propagate NaN.
+ if (SDValue Res =
+ EmitMinMax(ISD::FMINIMUM, ISD::FMAXIMUM, /*MayPropagateNaN=*/true))
+ return Res;
}
SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
diff --git a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
index 897ade00320db..e9db8434869bb 100644
--- a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
@@ -86,18 +86,16 @@ define <vscale x 4 x i16> @test_signed_v4f32_v4i16(<vscale x 4 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-956301312 // =0xc7000000
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z2.s, #-32768 // =0xffffffffffff8000
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: mov w8, #65024 // =0xfe00
; CHECK-NEXT: movk w8, #18175, lsl #16
-; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: mov z1.s, w8
-; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: mov z1.s, #32767 // =0x7fff
-; CHECK-NEXT: fcvtzs z2.s, p1/m, z0.s
+; CHECK-NEXT: mov z2.s, w8
+; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: fcvtzs z1.s, p0/m, z1.s
; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s
-; CHECK-NEXT: sel z0.s, p2, z1.s, z2.s
-; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
+; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
%x = call <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f32.nxv4i16(<vscale x 4 x float> %f)
ret <vscale x 4 x i16> %x
@@ -108,26 +106,22 @@ define <vscale x 8 x i16> @test_signed_v8f32_v8i16(<vscale x 8 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-956301312 // =0xc7000000
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z3.s, #-32768 // =0xffffffffffff8000
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: mov w8, #65024 // =0xfe00
-; CHECK-NEXT: mov z5.s, #32767 // =0x7fff
; CHECK-NEXT: movk w8, #18175, lsl #16
; CHECK-NEXT: mov z4.s, w8
-; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z2.s
-; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z2.s
-; CHECK-NEXT: mov z2.s, #-32768 // =0xffffffffffff8000
-; CHECK-NEXT: fcmgt p3.s, p0/z, z0.s, z4.s
-; CHECK-NEXT: fcvtzs z3.s, p1/m, z1.s
-; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z4.s
-; CHECK-NEXT: fcvtzs z2.s, p2/m, z0.s
-; CHECK-NEXT: fcmuo p2.s, p0/z, z1.s, z1.s
+; CHECK-NEXT: movprfx z3, z1
+; CHECK-NEXT: fmaxnm z3.s, p0/m, z3.s, z2.s
+; CHECK-NEXT: fmaxnm z2.s, p0/m, z2.s, z0.s
+; CHECK-NEXT: fcmuo p1.s, p0/z, z1.s, z1.s
+; CHECK-NEXT: fminnm z3.s, p0/m, z3.s, z4.s
+; CHECK-NEXT: fminnm z2.s, p0/m, z2.s, z4.s
+; CHECK-NEXT: fcvtzs z3.s, p0/m, z3.s
+; CHECK-NEXT: fcvtzs z2.s, p0/m, z2.s
; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s
-; CHECK-NEXT: sel z0.s, p1, z5.s, z3.s
-; CHECK-NEXT: sel z1.s, p3, z5.s, z2.s
-; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0
-; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0
-; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
+; CHECK-NEXT: mov z3.s, p1/m, #0 // =0x0
+; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0
+; CHECK-NEXT: uzp1 z0.h, z2.h, z3.h
; CHECK-NEXT: ret
%x = call <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f32.nxv8i16(<vscale x 8 x float> %f)
ret <vscale x 8 x i16> %x
@@ -203,14 +197,12 @@ define <vscale x 2 x i32> @test_signed_v2f64_v2i32(<vscale x 2 x double> %f) {
; CHECK-NEXT: mov x8, #281474972516352 // =0xffffffc00000
; CHECK-NEXT: movk x8, #16863, lsl #48
; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: mov z1.d, #0xffffffff80000000
-; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z2.d
-; CHECK-NEXT: mov z2.d, #0x7fffffff
+; CHECK-NEXT: fmaxnm z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: fminnm z1.d, p0/m, z1.d, z2.d
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d
-; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.d
-; CHECK-NEXT: sel z0.d, p2, z2.d, z1.d
-; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
+; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
%x = call <vscale x 2 x i32> @llvm.fptosi.sat.nxv2f64.nxv2i32(<vscale x 2 x double> %f)
ret <vscale x 2 x i32> %x
@@ -221,26 +213,22 @@ define <vscale x 4 x i32> @test_signed_v4f64_v4i32(<vscale x 4 x double> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-4476578029606273024 // =0xc1e0000000000000
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z3.d, #0xffffffff80000000
; CHECK-NEXT: mov z2.d, x8
; CHECK-NEXT: mov x8, #281474972516352 // =0xffffffc00000
-; CHECK-NEXT: mov z4.d, #0xffffffff80000000
; CHECK-NEXT: movk x8, #16863, lsl #48
-; CHECK-NEXT: mov z5.d, x8
-; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z2.d
-; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z2.d
-; CHECK-NEXT: mov z2.d, #0x7fffffff
-; CHECK-NEXT: fcmgt p3.d, p0/z, z0.d, z5.d
-; CHECK-NEXT: fcvtzs z3.d, p1/m, z1.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z5.d
-; CHECK-NEXT: fcvtzs z4.d, p2/m, z0.d
-; CHECK-NEXT: fcmuo p2.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: mov z4.d, x8
+; CHECK-NEXT: movprfx z3, z1
+; CHECK-NEXT: fmaxnm z3.d, p0/m, z3.d, z2.d
+; CHECK-NEXT: fmaxnm z2.d, p0/m, z2.d, z0.d
+; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: fminnm z3.d, p0/m, z3.d, z4.d
+; CHECK-NEXT: fminnm z2.d, p0/m, z2.d, z4.d
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d
-; CHECK-NEXT: sel z0.d, p1, z2.d, z3.d
-; CHECK-NEXT: sel z1.d, p3, z2.d, z4.d
-; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0
-; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0
-; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
+; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0
+; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0
+; CHECK-NEXT: uzp1 z0.s, z2.s, z3.s
; CHECK-NEXT: ret
%x = call <vscale x 4 x i32> @llvm.fptosi.sat.nxv4f64.nxv4i32(<vscale x 4 x double> %f)
ret <vscale x 4 x i32> %x
@@ -249,55 +237,37 @@ define <vscale x 4 x i32> @test_signed_v4f64_v4i32(<vscale x 4 x double> %f) {
define <vscale x 8 x i32> @test_signed_v8f64_v8i32(<vscale x 8 x double> %f) {
; CHECK-LABEL: test_signed_v8f64_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill
-; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill
-; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
-; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: mov x8, #-4476578029606273024 // =0xc1e0000000000000
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z5.d, #0xffffffff80000000
; CHECK-NEXT: mov z4.d, x8
; CHECK-NEXT: mov x8, #281474972516352 // =0xffffffc00000
-; CHECK-NEXT: mov z6.d, #0xffffffff80000000
; CHECK-NEXT: movk x8, #16863, lsl #48
-; CHECK-NEXT: mov z7.d, #0xffffffff80000000
-; CHECK-NEXT: mov z25.d, #0x7fffffff
-; CHECK-NEXT: mov z24.d, x8
-; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z4.d
-; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z4.d
-; CHECK-NEXT: fcmge p3.d, p0/z, z3.d, z4.d
-; CHECK-NEXT: fcmgt p4.d, p0/z, z0.d, z24.d
-; CHECK-NEXT: fcvtzs z5.d, p1/m, z1.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z24.d
-; CHECK-NEXT: fcvtzs z6.d, p2/m, z0.d
-; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, z4.d
-; CHECK-NEXT: mov z4.d, #0xffffffff80000000
-; CHECK-NEXT: fcvtzs z7.d, p3/m, z3.d
-; CHECK-NEXT: fcmuo p3.d, p0/z, z1.d, z1.d
-; CHECK-NEXT: fcmgt p5.d, p0/z, z2.d, z24.d
-; CHECK-NEXT: sel z1.d, p1, z25.d, z5.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z24.d
-; CHECK-NEXT: fcvtzs z4.d, p2/m, z2.d
+; CHECK-NEXT: mov z5.d, x8
+; CHECK-NEXT: movprfx z6, z1
+; CHECK-NEXT: fmaxnm z6.d, p0/m, z6.d, z4.d
+; CHECK-NEXT: movprfx z7, z0
+; CHECK-NEXT: fmaxnm z7.d, p0/m, z7.d, z4.d
+; CHECK-NEXT: movprfx z24, z3
+; CHECK-NEXT: fmaxnm z24.d, p0/m, z24.d, z4.d
+; CHECK-NEXT: fmaxnm z4.d, p0/m, z4.d, z2.d
+; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d
; CHECK-NEXT: fcmuo p2.d, p0/z, z0.d, z0.d
-; CHECK-NEXT: sel z0.d, p4, z25.d, z6.d
-; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload
-; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0
-; CHECK-NEXT: fcmuo p6.d, p0/z, z3.d, z3.d
+; CHECK-NEXT: fminnm z6.d, p0/m, z6.d, z5.d
+; CHECK-NEXT: fminnm z7.d, p0/m, z7.d, z5.d
+; CHECK-NEXT: fminnm z24.d, p0/m, z24.d, z5.d
+; CHECK-NEXT: fminnm z4.d, p0/m, z4.d, z5.d
+; CHECK-NEXT: fcmuo p3.d, p0/z, z3.d, z3.d
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.d
+; CHECK-NEXT: fcvtzs z24.d, p0/m, z24.d
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d
; CHECK-NEXT: fcmuo p0.d, p0/z, z2.d, z2.d
-; CHECK-NEXT: sel z2.d, p1, z25.d, z7.d
-; CHECK-NEXT: sel z3.d, p5, z25.d, z4.d
-; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload
-; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0
-; CHECK-NEXT: mov z2.d, p6/m, #0 // =0x0
-; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload
-; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0
-; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
-; CHECK-NEXT: uzp1 z1.s, z3.s, z2.s
-; CHECK-NEXT: addvl sp, sp, #1
-; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: mov z6.d, p1/m, #0 // =0x0
+; CHECK-NEXT: mov z7.d, p2/m, #0 // =0x0
+; CHECK-NEXT: mov z24.d, p3/m, #0 // =0x0
+; CHECK-NEXT: mov z4.d, p0/m, #0 // =0x0
+; CHECK-NEXT: uzp1 z0.s, z7.s, z6.s
+; CHECK-NEXT: uzp1 z1.s, z4.s, z24.s
; CHECK-NEXT: ret
%x = call <vscale x 8 x i32> @llvm.fptosi.sat.nxv8f64.nxv8i32(<vscale x 8 x double> %f)
ret <vscale x 8 x i32> %x
@@ -308,26 +278,22 @@ define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-4548635623644200960 // =0xc0e0000000000000
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z3.d, #-32768 // =0xffffffffffff8000
; CHECK-NEXT: mov z2.d, x8
; CHECK-NEXT: mov x8, #281200098803712 // =0xffc000000000
-; CHECK-NEXT: mov z5.d, #32767 // =0x7fff
; CHECK-NEXT: movk x8, #16607, lsl #48
; CHECK-NEXT: mov z4.d, x8
-; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z2.d
-; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z2.d
-; CHECK-NEXT: mov z2.d, #-32768 // =0xffffffffffff8000
-; CHECK-NEXT: fcmgt p3.d, p0/z, z0.d, z4.d
-; CHECK-NEXT: fcvtzs z3.d, p1/m, z1.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z4.d
-; CHECK-NEXT: fcvtzs z2.d, p2/m, z0.d
-; CHECK-NEXT: fcmuo p2.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: movprfx z3, z1
+; CHECK-NEXT: fmaxnm z3.d, p0/m, z3.d, z2.d
+; CHECK-NEXT: fmaxnm z2.d, p0/m, z2.d, z0.d
+; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: fminnm z3.d, p0/m, z3.d, z4.d
+; CHECK-NEXT: fminnm z2.d, p0/m, z2.d, z4.d
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d
-; CHECK-NEXT: sel z0.d, p1, z5.d, z3.d
-; CHECK-NEXT: sel z1.d, p3, z5.d, z2.d
-; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0
-; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0
-; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
+; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0
+; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0
+; CHECK-NEXT: uzp1 z0.s, z2.s, z3.s
; CHECK-NEXT: ret
%x = call <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f64.nxv4i16(<vscale x 4 x double> %f)
ret <vscale x 4 x i16> %x
@@ -336,56 +302,38 @@ define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
define <vscale x 8 x i16> @test_signed_v8f64_v8i16(<vscale x 8 x double> %f) {
; CHECK-LABEL: test_signed_v8f64_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill
-; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill
-; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
-; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: mov x8, #-4548635623644200960 // =0xc0e0000000000000
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z5.d, #-32768 // =0xffffffffffff8000
; CHECK-NEXT: mov z4.d, x8
; CHECK-NEXT: mov x8, #281200098803712 // =0xffc000000000
-; CHECK-NEXT: mov z7.d, #-32768 // =0xffffffffffff8000
; CHECK-NEXT: movk x8, #16607, lsl #48
-; CHECK-NEXT: mov z24.d, #-32768 // =0xffffffffffff8000
-; CHECK-NEXT: mov z25.d, #32767 // =0x7fff
-; CHECK-NEXT: mov z6.d, x8
-; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, z4.d
-; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, z4.d
-; CHECK-NEXT: fcmge p3.d, p0/z, z1.d, z4.d
-; CHECK-NEXT: fcmgt p4.d, p0/z, z2.d, z6.d
-; CHECK-NEXT: fcvtzs z5.d, p1/m, z3.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z6.d
-; CHECK-NEXT: fcvtzs z7.d, p2/m, z2.d
-; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z4.d
-; CHECK-NEXT: fcvtzs z24.d, p3/m, z1.d
-; CHECK-NEXT: fcmuo p3.d, p0/z, z3.d, z3.d
-; CHECK-NEXT: mov z3.d, #-32768 // =0xffffffffffff8000
-; CHECK-NEXT: fcmgt p5.d, p0/z, z0.d, z6.d
-; CHECK-NEXT: sel z4.d, p1, z25.d, z5.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z6.d
-; CHECK-NEXT: fcvtzs z3.d, p2/m, z0.d
+; CHECK-NEXT: mov z5.d, x8
+; CHECK-NEXT: movprfx z6, z3
+; CHECK-NEXT: fmaxnm z6.d, p0/m, z6.d, z4.d
+; CHECK-NEXT: movprfx z7, z2
+; CHECK-NEXT: fmaxnm z7.d, p0/m, z7.d, z4.d
+; CHECK-NEXT: movprfx z24, z1
+; CHECK-NEXT: fmaxnm z24.d, p0/m, z24.d, z4.d
+; CHECK-NEXT: fmaxnm z4.d, p0/m, z4.d, z0.d
+; CHECK-NEXT: fcmuo p1.d, p0/z, z3.d, z3.d
; CHECK-NEXT: fcmuo p2.d, p0/z, z2.d, z2.d
-; CHECK-NEXT: fcmuo p6.d, p0/z, z1.d, z1.d
-; CHECK-NEXT: mov z4.d, p3/m, #0 // =0x0
+; CHECK-NEXT: fminnm z6.d, p0/m, z6.d, z5.d
+; CHECK-NEXT: fminnm z7.d, p0/m, z7.d, z5.d
+; CHECK-NEXT: fminnm z24.d, p0/m, z24.d, z5.d
+; CHECK-NEXT: fminnm z4.d, p0/m, z4.d, z5.d
+; CHECK-NEXT: fcmuo p3.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.d
+; CHECK-NEXT: fcvtzs z24.d, p0/m, z24.d
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d
; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d
-; CHECK-NEXT: sel z0.d, p4, z25.d, z7.d
-; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload
-; CHECK-NEXT: sel z1.d, p1, z25.d, z24.d
-; CHECK-NEXT: sel z2.d, p5, z25.d, z3.d
-; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload
-; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0
-; CHECK-NEXT: mov z1.d, p6/m, #0 // =0x0
-; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0
-; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload
-; CHECK-NEXT: uzp1 z0.s, z0.s, z4.s
-; CHECK-NEXT: uzp1 z1.s, z2.s, z1.s
+; CHECK-NEXT: mov z6.d, p1/m, #0 // =0x0
+; CHECK-NEXT: mov z7.d, p2/m, #0 // =0x0
+; CHECK-NEXT: mov z24.d, p3/m, #0 // =0x0
+; CHECK-NEXT: mov z4.d, p0/m, #0 // =0x0
+; CHECK-NEXT: uzp1 z0.s, z7.s, z6.s
+; CHECK-NEXT: uzp1 z1.s, z4.s, z24.s
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
-; CHECK-NEXT: addvl sp, sp, #1
-; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%x = call <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f64.nxv8i16(<vscale x 8 x double> %f)
ret <vscale x 8 x i16> %x
diff --git a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll
index fe398921cf1b5..61620fbdf0826 100644
--- a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll
@@ -73,14 +73,15 @@ define <vscale x 4 x i16> @test_signed_v4f32_v4i16(<vscale x 4 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov w8, #65280 // =0xff00
-; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: movk w8, #18303, lsl #16
-; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: movprfx z1, z0
+; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, #0.0
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s
-; CHECK-NEXT: fcvtzu z1.s, p1/m, z0.s
-; CHECK-NEXT: mov z0.s, #65535 // =0xffff
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: fcvtzu z1.s, p0/m, z1.s
+; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s
+; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
%x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f32.nxv4i16(<vscale x 4 x float> %f)
ret <vscale x 4 x i16> %x
@@ -91,20 +92,21 @@ define <vscale x 8 x i16> @test_signed_v8f32_v8i16(<vscale x 8 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov w8, #65280 // =0xff00
-; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: movk w8, #18303, lsl #16
-; CHECK-NEXT: movi v3.2d, #0000000000000000
-; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, #0.0
+; CHECK-NEXT: movprfx z2, z1
+; CHECK-NEXT: fmaxnm z2.s, p0/m, z2.s, #0.0
+; CHECK-NEXT: movprfx z3, z0
+; CHECK-NEXT: fmaxnm z3.s, p0/m, z3.s, #0.0
; CHECK-NEXT: mov z4.s, w8
-; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, #0.0
-; CHECK-NEXT: fcvtzu z2.s, p1/m, z1.s
-; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z4.s
-; CHECK-NEXT: mov z1.s, #65535 // =0xffff
-; CHECK-NEXT: fcvtzu z3.s, p2/m, z0.s
-; CHECK-NE...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/184565
More information about the llvm-branch-commits
mailing list