[llvm] [AArch64][SDAG] Lower f16->s16 FP_TO_INT_SAT to *v1f16 (PR #154822)

Thu Aug 21 11:56:46 PDT 2025

https://github.com/mrkajetanp created https://github.com/llvm/llvm-project/pull/154822

Conversions from f16 to s16 performed by FP_TO_INT_SAT can be done directly within FPRs, e.g. `fcvtzs h0, h0`.
Generating this format reduces the number of instruction required for correct behaviour, as it sidesteps the issues with incorrect saturation that arise when using `fcvtzs w0, h0` for the same casts.

Related to https://github.com/llvm/llvm-project/issues/154343.

>From 54c8ac4ee3fdc10e2de5d3b65df7dcadf9ee0eda Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski at arm.com>
Date: Thu, 21 Aug 2025 18:44:20 +0000
Subject: [PATCH] [AArch64][SDAG] Lower f16->s16 FP_TO_INT_SAT to *v1f16

Conversions from f16 to s16 performed by FP_TO_INT_SAT can be done
directly within FPRs, e.g. `fcvtzs h0, h0`.
Generating this format reduces the number of instruction required for
correct behaviour, as it sidesteps the issues with incorrect saturation
that arise when using `fcvtzs w0, h0` for the same casts.

Signed-off-by: Kajetan Puchalski <kajetan.puchalski at arm.com>
---
 .../lib/Target/AArch64/AArch64ISelLowering.cpp | 18 ++++++++++++++++++
 llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll |  9 ++-------
 llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll |  6 ++----
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d168cc8d1bd06..f7cb76ed64b13 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4912,6 +4912,24 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
   if (DstWidth < SatWidth)
     return SDValue();
 
+  if (SrcVT == MVT::f16 && SatVT == MVT::i16 && DstVT == MVT::i32) {
+    auto Opcode = (Op.getOpcode() == ISD::FP_TO_SINT_SAT)
+                      ? AArch64::FCVTZSv1f16
+                      : AArch64::FCVTZUv1f16;
+    auto Cvt = SDValue(DAG.getMachineNode(Opcode, DL, MVT::f16, SrcVal), 0);
+    auto Sign = DAG.getTargetConstant(-1, DL, MVT::i64);
+    auto Hsub = DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32);
+    auto SubregToReg =
+        SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, DL, MVT::v8f16,
+                                   Sign, Cvt, Hsub),
+                0);
+    auto Ssub = DAG.getTargetConstant(AArch64::ssub, DL, MVT::i32);
+    auto Extract = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+                                              MVT::f32, SubregToReg, Ssub),
+                           0);
+    return DAG.getBitcast(MVT::i32, Extract);
+  }
+
   SDValue NativeCvt =
       DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal, DAG.getValueType(DstVT));
   SDValue Sat;
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index e3aef487890f9..a5f6ac628403c 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -670,13 +670,8 @@ define i16 @test_signed_i16_f16(half %f) nounwind {
 ;
 ; CHECK-SD-FP16-LABEL: test_signed_i16_f16:
 ; CHECK-SD-FP16:       // %bb.0:
-; CHECK-SD-FP16-NEXT:    fcvtzs w8, h0
-; CHECK-SD-FP16-NEXT:    mov w9, #32767 // =0x7fff
-; CHECK-SD-FP16-NEXT:    cmp w8, w9
-; CHECK-SD-FP16-NEXT:    csel w8, w8, w9, lt
-; CHECK-SD-FP16-NEXT:    mov w9, #-32768 // =0xffff8000
-; CHECK-SD-FP16-NEXT:    cmn w8, #8, lsl #12 // =32768
-; CHECK-SD-FP16-NEXT:    csel w0, w8, w9, gt
+; CHECK-SD-FP16-NEXT:    fcvtzs h0, h0
+; CHECK-SD-FP16-NEXT:    fmov w0, s0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
 ; CHECK-GI-CVT-LABEL: test_signed_i16_f16:
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
index 07e49e331415e..2613f8337a918 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
@@ -531,10 +531,8 @@ define i16 @test_unsigned_i16_f16(half %f) nounwind {
 ;
 ; CHECK-SD-FP16-LABEL: test_unsigned_i16_f16:
 ; CHECK-SD-FP16:       // %bb.0:
-; CHECK-SD-FP16-NEXT:    fcvtzu w8, h0
-; CHECK-SD-FP16-NEXT:    mov w9, #65535 // =0xffff
-; CHECK-SD-FP16-NEXT:    cmp w8, w9
-; CHECK-SD-FP16-NEXT:    csel w0, w8, w9, lo
+; CHECK-SD-FP16-NEXT:    fcvtzu h0, h0
+; CHECK-SD-FP16-NEXT:    fmov w0, s0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
 ; CHECK-GI-CVT-LABEL: test_unsigned_i16_f16: