[llvm] 6dd67f8 - [AArch64][SDAG] Lower f16->s16 FP_TO_INT_SAT to *v1f16 (#154822)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 28 07:19:37 PDT 2025
Author: Kajetan Puchalski
Date: 2025-08-28T15:19:34+01:00
New Revision: 6dd67f8c8a0ff5d8b46a69f2316c41bb11536881
URL: https://github.com/llvm/llvm-project/commit/6dd67f8c8a0ff5d8b46a69f2316c41bb11536881
DIFF: https://github.com/llvm/llvm-project/commit/6dd67f8c8a0ff5d8b46a69f2316c41bb11536881.diff
LOG: [AArch64][SDAG] Lower f16->s16 FP_TO_INT_SAT to *v1f16 (#154822)
Conversions from f16 to s16 performed by FP_TO_INT_SAT can be done
directly within FPRs, e.g. `fcvtzs h0, h0`.
Generating this format reduces the number of instruction required for
correct behaviour, as it sidesteps the issues with incorrect saturation
that arise when using `fcvtzs w0, h0` for the same casts.
Add new AArch64ISD::FCVTZS_HALF and AArch64ISD::FCVTZU_HALF nodes to
represent the necessary instruction sequence.
Related to https://github.com/llvm/llvm-project/issues/154343.
---------
Signed-off-by: Kajetan Puchalski <kajetan.puchalski at arm.com>
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7bea2dcec5891..23328ed57fb36 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4935,6 +4935,18 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
if (DstWidth < SatWidth)
return SDValue();
+ if (SrcVT == MVT::f16 && SatVT == MVT::i16 && DstVT == MVT::i32) {
+ if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
+ SDValue CVTf32 =
+ DAG.getNode(AArch64ISD::FCVTZS_HALF, DL, MVT::f32, SrcVal);
+ SDValue Bitcast = DAG.getBitcast(DstVT, CVTf32);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, DstVT, Bitcast,
+ DAG.getValueType(SatVT));
+ }
+ SDValue CVTf32 = DAG.getNode(AArch64ISD::FCVTZU_HALF, DL, MVT::f32, SrcVal);
+ return DAG.getBitcast(DstVT, CVTf32);
+ }
+
SDValue NativeCvt =
DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal, DAG.getValueType(DstVT));
SDValue Sat;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index ad8556b824cda..07c07008c0e05 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -989,6 +989,9 @@ def AArch64fcvtxnv: PatFrags<(ops node:$Rn),
[(int_aarch64_neon_fcvtxn node:$Rn),
(AArch64fcvtxn_n node:$Rn)]>;
+def AArch64fcvtzs_half : SDNode<"AArch64ISD::FCVTZS_HALF", SDTFPExtendOp>;
+def AArch64fcvtzu_half : SDNode<"AArch64ISD::FCVTZU_HALF", SDTFPExtendOp>;
+
//def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>;
// Vector immediate ops
@@ -6539,6 +6542,16 @@ defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
(CMLTv1i64rz V64:$Rn)>;
+// f16 -> i16 conversions leave the bit pattern in a f32
+class F16ToI16ScalarPat<SDNode cvt_isd, BaseSIMDTwoScalar instr>
+ : Pat<(f32 (cvt_isd (f16 FPR16:$Rn))),
+ (f32 (SUBREG_TO_REG (i64 0), (instr FPR16:$Rn), hsub))>;
+
+let Predicates = [HasFullFP16] in {
+def : F16ToI16ScalarPat<AArch64fcvtzs_half, FCVTZSv1f16>;
+def : F16ToI16ScalarPat<AArch64fcvtzu_half, FCVTZUv1f16>;
+}
+
// Round FP64 to BF16.
let Predicates = [HasNEONandIsStreamingSafe, HasBF16] in
def : Pat<(bf16 (any_fpround (f64 FPR64:$Rn))),
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index e3aef487890f9..83a1893cdbc75 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -670,13 +670,9 @@ define i16 @test_signed_i16_f16(half %f) nounwind {
;
; CHECK-SD-FP16-LABEL: test_signed_i16_f16:
; CHECK-SD-FP16: // %bb.0:
-; CHECK-SD-FP16-NEXT: fcvtzs w8, h0
-; CHECK-SD-FP16-NEXT: mov w9, #32767 // =0x7fff
-; CHECK-SD-FP16-NEXT: cmp w8, w9
-; CHECK-SD-FP16-NEXT: csel w8, w8, w9, lt
-; CHECK-SD-FP16-NEXT: mov w9, #-32768 // =0xffff8000
-; CHECK-SD-FP16-NEXT: cmn w8, #8, lsl #12 // =32768
-; CHECK-SD-FP16-NEXT: csel w0, w8, w9, gt
+; CHECK-SD-FP16-NEXT: fcvtzs h0, h0
+; CHECK-SD-FP16-NEXT: fmov w8, s0
+; CHECK-SD-FP16-NEXT: sxth w0, w8
; CHECK-SD-FP16-NEXT: ret
;
; CHECK-GI-CVT-LABEL: test_signed_i16_f16:
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
index 07e49e331415e..2613f8337a918 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
@@ -531,10 +531,8 @@ define i16 @test_unsigned_i16_f16(half %f) nounwind {
;
; CHECK-SD-FP16-LABEL: test_unsigned_i16_f16:
; CHECK-SD-FP16: // %bb.0:
-; CHECK-SD-FP16-NEXT: fcvtzu w8, h0
-; CHECK-SD-FP16-NEXT: mov w9, #65535 // =0xffff
-; CHECK-SD-FP16-NEXT: cmp w8, w9
-; CHECK-SD-FP16-NEXT: csel w0, w8, w9, lo
+; CHECK-SD-FP16-NEXT: fcvtzu h0, h0
+; CHECK-SD-FP16-NEXT: fmov w0, s0
; CHECK-SD-FP16-NEXT: ret
;
; CHECK-GI-CVT-LABEL: test_unsigned_i16_f16:
More information about the llvm-commits
mailing list