[llvm] [AArch64] Lower aarch64.neon.fcvtzs.i16.f16 to FP_TO_SINT_SAT (PR #154344)
Kajetan Puchalski via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 19 07:16:54 PDT 2025
https://github.com/mrkajetanp created https://github.com/llvm/llvm-project/pull/154344
FP_TO_SINT_SAT is capable of correctly handling a f16 -> s16 conversion, including correct overflow behaviour. The semantics of the operation match those of the vcvth_s16_f16 NEON intrinsic. Enable correct lowering of aarch64.neon.fcvtzs.i16.f16 by making use of it.
Part of a solution to https://github.com/llvm/llvm-project/issues/154343.
>From 0c9e5f8c4934c00340006865d79aeb931803181f Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski at arm.com>
Date: Tue, 19 Aug 2025 14:06:02 +0000
Subject: [PATCH] [AArch64] Lower aarch64.neon.fcvtzs.i16.f16 to FP_TO_SINT_SAT
FP_TO_SINT_SAT is capable of correctly handling a f16 -> s16 conversion,
including correct overflow behaviour. The semantics of the operation
match those of the vcvth_s16_f16 NEON intrinsic. Enable correct lowering
of aarch64.neon.fcvtzs.i16.f16 by making use of it.
Signed-off-by: Kajetan Puchalski <kajetan.puchalski at arm.com>
---
.../Target/AArch64/AArch64ISelLowering.cpp | 13 ++++++++++++
.../GISel/AArch64InstructionSelector.cpp | 17 ++++++++++++++++
.../AArch64/fp16_s16_intrinsic_scalar.ll | 20 +++++++++++++++++++
3 files changed, 50 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/fp16_s16_intrinsic_scalar.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index aefbbe2534be2..9342fbbc0e011 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1289,6 +1289,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
+
+ // f16 -> i16 conversion intrinsics need custom lowering
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
} else {
// when AArch64 doesn't have fullfp16 support, promote the input
// to i32 first.
@@ -28238,6 +28241,16 @@ void AArch64TargetLowering::ReplaceNodeResults(
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
+ case Intrinsic::aarch64_neon_fcvtzs: {
+ if (VT.getScalarType() != MVT::i16)
+ return;
+
+ SDLoc DL(N);
+ auto CVT = DAG.getNode(ISD::FP_TO_SINT_SAT, DL, VT,
+ N->getOperand(1), DAG.getValueType(MVT::i16));
+ Results.push_back(CVT);
+ return;
+ }
}
}
case ISD::READ_REGISTER: {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index ee34a85a5b507..16fe6ec176e53 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2278,6 +2278,23 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
}
return false;
}
+ case TargetOpcode::G_INTRINSIC: {
+ unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
+ switch (IntrinID) {
+ default:
+ break;
+ case Intrinsic::aarch64_neon_fcvtzs: {
+ const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ if (DstTy != LLT::scalar(16))
+ return false;
+ // Remove the no longer needed intrinsic ID operand
+ I.removeOperand(1);
+ I.setDesc(TII.get(TargetOpcode::G_FPTOSI_SAT));
+ return true;
+ }
+ }
+ return false;
+ }
default:
return false;
}
diff --git a/llvm/test/CodeGen/AArch64/fp16_s16_intrinsic_scalar.ll b/llvm/test/CodeGen/AArch64/fp16_s16_intrinsic_scalar.ll
new file mode 100644
index 0000000000000..955ee2e4b319f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fp16_s16_intrinsic_scalar.ll
@@ -0,0 +1,20 @@
+; Test fp16 -> s16 conversion intrinsics which require special handling to ensure correct behaviour.
+; RUN: llc < %s -mtriple=aarch64 -global-isel=0 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK-SD
+
+declare i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half)
+
+define i16 @fcvtzs_intrinsic_i16(half %a) {
+; CHECK-SD-LABEL: fcvtzs_intrinsic_i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fcvtzs w8, h0
+; CHECK-SD-NEXT: mov w9, #32767
+; CHECK-SD-NEXT: cmp w8, w9
+; CHECK-SD-NEXT: csel w8, w8, w9, lt
+; CHECK-SD-NEXT: mov w9, #-32768
+; CHECK-SD-NEXT: cmn w8, #8, lsl #12
+; CHECK-SD-NEXT: csel
+; CHECK-SD-NEXT: ret
+entry:
+ %fcvt = tail call i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half %a)
+ ret i16 %fcvt
+}
More information about the llvm-commits
mailing list