[llvm] [AArch64] Lower aarch64.neon.fcvtzs.i16.f16 to FP_TO_SINT_SAT (PR #154344)

Tue Aug 19 07:16:54 PDT 2025

https://github.com/mrkajetanp created https://github.com/llvm/llvm-project/pull/154344

FP_TO_SINT_SAT is capable of correctly handling a f16 -> s16 conversion, including correct overflow behaviour. The semantics of the operation match those of the vcvth_s16_f16 NEON intrinsic. Enable correct lowering of aarch64.neon.fcvtzs.i16.f16 by making use of it.

Part of a solution to https://github.com/llvm/llvm-project/issues/154343. 

>From 0c9e5f8c4934c00340006865d79aeb931803181f Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski at arm.com>
Date: Tue, 19 Aug 2025 14:06:02 +0000
Subject: [PATCH] [AArch64] Lower aarch64.neon.fcvtzs.i16.f16 to FP_TO_SINT_SAT

FP_TO_SINT_SAT is capable of correctly handling a f16 -> s16 conversion,
including correct overflow behaviour. The semantics of the operation
match those of the vcvth_s16_f16 NEON intrinsic. Enable correct lowering
of aarch64.neon.fcvtzs.i16.f16 by making use of it.

Signed-off-by: Kajetan Puchalski <kajetan.puchalski at arm.com>
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 13 ++++++++++++
 .../GISel/AArch64InstructionSelector.cpp      | 17 ++++++++++++++++
 .../AArch64/fp16_s16_intrinsic_scalar.ll      | 20 +++++++++++++++++++
 3 files changed, 50 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/fp16_s16_intrinsic_scalar.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index aefbbe2534be2..9342fbbc0e011 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1289,6 +1289,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
       setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
       setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
+
+      // f16 -> i16 conversion intrinsics need custom lowering
+      setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
     } else {
       // when AArch64 doesn't have fullfp16 support, promote the input
       // to i32 first.
@@ -28238,6 +28241,16 @@ void AArch64TargetLowering::ReplaceNodeResults(
       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
       return;
     }
+    case Intrinsic::aarch64_neon_fcvtzs: {
+      if (VT.getScalarType() != MVT::i16)
+        return;
+
+      SDLoc DL(N);
+      auto CVT = DAG.getNode(ISD::FP_TO_SINT_SAT, DL, VT,
+          N->getOperand(1), DAG.getValueType(MVT::i16));
+      Results.push_back(CVT);
+      return;
+    }
     }
   }
   case ISD::READ_REGISTER: {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index ee34a85a5b507..16fe6ec176e53 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2278,6 +2278,23 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
     }
     return false;
   }
+  case TargetOpcode::G_INTRINSIC: {
+    unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
+    switch (IntrinID) {
+      default:
+        break;
+      case Intrinsic::aarch64_neon_fcvtzs: {
+        const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+        if (DstTy != LLT::scalar(16))
+          return false;
+        // Remove the no longer needed intrinsic ID operand
+        I.removeOperand(1);
+        I.setDesc(TII.get(TargetOpcode::G_FPTOSI_SAT));
+        return true;
+      }
+    }
+    return false;
+  }
   default:
     return false;
   }
diff --git a/llvm/test/CodeGen/AArch64/fp16_s16_intrinsic_scalar.ll b/llvm/test/CodeGen/AArch64/fp16_s16_intrinsic_scalar.ll
new file mode 100644
index 0000000000000..955ee2e4b319f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fp16_s16_intrinsic_scalar.ll
@@ -0,0 +1,20 @@
+; Test fp16 -> s16 conversion intrinsics which require special handling to ensure correct behaviour.
+; RUN: llc < %s -mtriple=aarch64 -global-isel=0 -mattr=+v8.2a,+fullfp16  | FileCheck %s --check-prefixes=CHECK-SD
+
+declare i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half)
+
+define i16 @fcvtzs_intrinsic_i16(half %a) {
+; CHECK-SD-LABEL: fcvtzs_intrinsic_i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fcvtzs w8, h0
+; CHECK-SD-NEXT:    mov w9, #32767
+; CHECK-SD-NEXT:    cmp w8, w9
+; CHECK-SD-NEXT:    csel w8, w8, w9, lt
+; CHECK-SD-NEXT:    mov w9, #-32768
+; CHECK-SD-NEXT:    cmn w8, #8, lsl #12
+; CHECK-SD-NEXT:    csel
+; CHECK-SD-NEXT:    ret
+entry:
+  %fcvt = tail call i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half %a)
+  ret i16 %fcvt
+}