[llvm] [AArch64][SDAG] Lower f16->s16 FP_TO_INT_SAT to *v1f16 (PR #154822)
Kajetan Puchalski via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 27 16:14:06 PDT 2025
https://github.com/mrkajetanp updated https://github.com/llvm/llvm-project/pull/154822
>From 0d35d2d67f7339bf5512be7485837c7ab82c2fb2 Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski at arm.com>
Date: Thu, 21 Aug 2025 18:44:20 +0000
Subject: [PATCH 1/4] [AArch64][SDAG] Lower f16->s16 FP_TO_INT_SAT to *v1f16
Conversions from f16 to s16 performed by FP_TO_INT_SAT can be done
directly within FPRs, e.g. `fcvtzs h0, h0`.
Generating this format reduces the number of instruction required for
correct behaviour, as it sidesteps the issues with incorrect saturation
that arise when using `fcvtzs w0, h0` for the same casts.
Signed-off-by: Kajetan Puchalski <kajetan.puchalski at arm.com>
---
.../lib/Target/AArch64/AArch64ISelLowering.cpp | 18 ++++++++++++++++++
llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll | 9 ++-------
llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll | 6 ++----
3 files changed, 22 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b8335113e4687..663917d07b5fc 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4911,6 +4911,24 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
if (DstWidth < SatWidth)
return SDValue();
+ if (SrcVT == MVT::f16 && SatVT == MVT::i16 && DstVT == MVT::i32) {
+ auto Opcode = (Op.getOpcode() == ISD::FP_TO_SINT_SAT)
+ ? AArch64::FCVTZSv1f16
+ : AArch64::FCVTZUv1f16;
+ auto Cvt = SDValue(DAG.getMachineNode(Opcode, DL, MVT::f16, SrcVal), 0);
+ auto Sign = DAG.getTargetConstant(-1, DL, MVT::i64);
+ auto Hsub = DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32);
+ auto SubregToReg =
+ SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, DL, MVT::v8f16,
+ Sign, Cvt, Hsub),
+ 0);
+ auto Ssub = DAG.getTargetConstant(AArch64::ssub, DL, MVT::i32);
+ auto Extract = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ MVT::f32, SubregToReg, Ssub),
+ 0);
+ return DAG.getBitcast(MVT::i32, Extract);
+ }
+
SDValue NativeCvt =
DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal, DAG.getValueType(DstVT));
SDValue Sat;
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index e3aef487890f9..a5f6ac628403c 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -670,13 +670,8 @@ define i16 @test_signed_i16_f16(half %f) nounwind {
;
; CHECK-SD-FP16-LABEL: test_signed_i16_f16:
; CHECK-SD-FP16: // %bb.0:
-; CHECK-SD-FP16-NEXT: fcvtzs w8, h0
-; CHECK-SD-FP16-NEXT: mov w9, #32767 // =0x7fff
-; CHECK-SD-FP16-NEXT: cmp w8, w9
-; CHECK-SD-FP16-NEXT: csel w8, w8, w9, lt
-; CHECK-SD-FP16-NEXT: mov w9, #-32768 // =0xffff8000
-; CHECK-SD-FP16-NEXT: cmn w8, #8, lsl #12 // =32768
-; CHECK-SD-FP16-NEXT: csel w0, w8, w9, gt
+; CHECK-SD-FP16-NEXT: fcvtzs h0, h0
+; CHECK-SD-FP16-NEXT: fmov w0, s0
; CHECK-SD-FP16-NEXT: ret
;
; CHECK-GI-CVT-LABEL: test_signed_i16_f16:
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
index 07e49e331415e..2613f8337a918 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
@@ -531,10 +531,8 @@ define i16 @test_unsigned_i16_f16(half %f) nounwind {
;
; CHECK-SD-FP16-LABEL: test_unsigned_i16_f16:
; CHECK-SD-FP16: // %bb.0:
-; CHECK-SD-FP16-NEXT: fcvtzu w8, h0
-; CHECK-SD-FP16-NEXT: mov w9, #65535 // =0xffff
-; CHECK-SD-FP16-NEXT: cmp w8, w9
-; CHECK-SD-FP16-NEXT: csel w0, w8, w9, lo
+; CHECK-SD-FP16-NEXT: fcvtzu h0, h0
+; CHECK-SD-FP16-NEXT: fmov w0, s0
; CHECK-SD-FP16-NEXT: ret
;
; CHECK-GI-CVT-LABEL: test_unsigned_i16_f16:
>From eed4fd2ea55bad09db2a0eedab8e4de3a418a008 Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski at arm.com>
Date: Tue, 26 Aug 2025 13:59:25 +0000
Subject: [PATCH 2/4] Move into AArch64ISD::FCVTZ[S|U]_HALF
---
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 28 +++++++++++++++++++
.../Target/AArch64/AArch64ISelLowering.cpp | 18 ++----------
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 3 ++
3 files changed, 34 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index bc786f415b554..ff1033563edf5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -424,6 +424,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
unsigned Opc_rr, unsigned Opc_ri);
+ void SelectFCVT_FPTOINT_Half(SDNode *N, unsigned Opc);
std::tuple<unsigned, SDValue, SDValue>
findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
const SDValue &OldBase, const SDValue &OldOffset,
@@ -2536,6 +2537,25 @@ void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
ReplaceNode(N, St);
}
+// Select f16 -> i16 conversions
+// Since i16 is an illegal type, they need to return an i32 result
+void AArch64DAGToDAGISel::SelectFCVT_FPTOINT_Half(SDNode *N, unsigned int Opc) {
+ SDLoc DL(N);
+ SDValue SrcVal = N->getOperand(0);
+ SDNode *Cvt = CurDAG->getMachineNode(Opc, DL, MVT::f16, SrcVal);
+ SDValue Sign = CurDAG->getTargetConstant(-1, DL, MVT::i64);
+ SDValue Hsub = CurDAG->getTargetConstant(AArch64::hsub, DL, MVT::i32);
+ SDNode *SubregToReg = CurDAG->getMachineNode(
+ TargetOpcode::SUBREG_TO_REG, DL, MVT::v8f16, Sign, SDValue(Cvt, 0), Hsub);
+ SDValue Ssub = CurDAG->getTargetConstant(AArch64::ssub, DL, MVT::i32);
+ SDNode *Extract =
+ CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f32,
+ SDValue(SubregToReg, 0), Ssub);
+ SDNode *Result = CurDAG->getMachineNode(AArch64::FMOVSWr, DL, MVT::i32,
+ SDValue(Extract, 0));
+ ReplaceNode(N, Result);
+}
+
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
unsigned &Opc, SDValue &Opd0,
unsigned &LSB, unsigned &MSB,
@@ -7359,6 +7379,14 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
}
break;
}
+ case AArch64ISD::FCVTZS_HALF: {
+ SelectFCVT_FPTOINT_Half(Node, AArch64::FCVTZSv1f16);
+ return;
+ }
+ case AArch64ISD::FCVTZU_HALF: {
+ SelectFCVT_FPTOINT_Half(Node, AArch64::FCVTZUv1f16);
+ return;
+ }
}
// Select the default instruction
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 663917d07b5fc..041ad3e74915c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4912,21 +4912,9 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
return SDValue();
if (SrcVT == MVT::f16 && SatVT == MVT::i16 && DstVT == MVT::i32) {
- auto Opcode = (Op.getOpcode() == ISD::FP_TO_SINT_SAT)
- ? AArch64::FCVTZSv1f16
- : AArch64::FCVTZUv1f16;
- auto Cvt = SDValue(DAG.getMachineNode(Opcode, DL, MVT::f16, SrcVal), 0);
- auto Sign = DAG.getTargetConstant(-1, DL, MVT::i64);
- auto Hsub = DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32);
- auto SubregToReg =
- SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, DL, MVT::v8f16,
- Sign, Cvt, Hsub),
- 0);
- auto Ssub = DAG.getTargetConstant(AArch64::ssub, DL, MVT::i32);
- auto Extract = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
- MVT::f32, SubregToReg, Ssub),
- 0);
- return DAG.getBitcast(MVT::i32, Extract);
+ if (Op.getOpcode() == ISD::FP_TO_SINT_SAT)
+ return DAG.getNode(AArch64ISD::FCVTZS_HALF, DL, DstVT, SrcVal);
+ return DAG.getNode(AArch64ISD::FCVTZU_HALF, DL, DstVT, SrcVal);
}
SDValue NativeCvt =
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index ad8556b824cda..13b8b6b330c75 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -989,6 +989,9 @@ def AArch64fcvtxnv: PatFrags<(ops node:$Rn),
[(int_aarch64_neon_fcvtxn node:$Rn),
(AArch64fcvtxn_n node:$Rn)]>;
+def AArch64fcvtzs_half : SDNode<"AArch64ISD::FCVTZS_HALF", SDTFPToIntOp>;
+def AArch64fcvtzu_half : SDNode<"AArch64ISD::FCVTZU_HALF", SDTFPToIntOp>;
+
//def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>;
// Vector immediate ops
>From e0898a0610e9286988104a2e9728295795156cb6 Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski at arm.com>
Date: Wed, 27 Aug 2025 12:10:38 +0000
Subject: [PATCH 3/4] Make new AArch64ISD nodes return f32
---
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 7 +++----
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 7 +++++--
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 4 ++--
3 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index ff1033563edf5..1a90747274017 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -2538,7 +2538,8 @@ void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
}
// Select f16 -> i16 conversions
-// Since i16 is an illegal type, they need to return an i32 result
+// Since i16 is an illegal type, we return the converted bit pattern in a f32
+// which can then be bitcast to i32 and truncated as needed.
void AArch64DAGToDAGISel::SelectFCVT_FPTOINT_Half(SDNode *N, unsigned int Opc) {
SDLoc DL(N);
SDValue SrcVal = N->getOperand(0);
@@ -2551,9 +2552,7 @@ void AArch64DAGToDAGISel::SelectFCVT_FPTOINT_Half(SDNode *N, unsigned int Opc) {
SDNode *Extract =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f32,
SDValue(SubregToReg, 0), Ssub);
- SDNode *Result = CurDAG->getMachineNode(AArch64::FMOVSWr, DL, MVT::i32,
- SDValue(Extract, 0));
- ReplaceNode(N, Result);
+ ReplaceNode(N, Extract);
}
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 041ad3e74915c..af0bc21d2fd6a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4912,9 +4912,12 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
return SDValue();
if (SrcVT == MVT::f16 && SatVT == MVT::i16 && DstVT == MVT::i32) {
+ SDValue CVTf32;
if (Op.getOpcode() == ISD::FP_TO_SINT_SAT)
- return DAG.getNode(AArch64ISD::FCVTZS_HALF, DL, DstVT, SrcVal);
- return DAG.getNode(AArch64ISD::FCVTZU_HALF, DL, DstVT, SrcVal);
+ CVTf32 = DAG.getNode(AArch64ISD::FCVTZS_HALF, DL, MVT::f32, SrcVal);
+ else
+ CVTf32 = DAG.getNode(AArch64ISD::FCVTZU_HALF, DL, MVT::f32, SrcVal);
+ return DAG.getBitcast(DstVT, CVTf32);
}
SDValue NativeCvt =
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 13b8b6b330c75..01d867c8abab3 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -989,8 +989,8 @@ def AArch64fcvtxnv: PatFrags<(ops node:$Rn),
[(int_aarch64_neon_fcvtxn node:$Rn),
(AArch64fcvtxn_n node:$Rn)]>;
-def AArch64fcvtzs_half : SDNode<"AArch64ISD::FCVTZS_HALF", SDTFPToIntOp>;
-def AArch64fcvtzu_half : SDNode<"AArch64ISD::FCVTZU_HALF", SDTFPToIntOp>;
+def AArch64fcvtzs_half : SDNode<"AArch64ISD::FCVTZS_HALF", SDTFPExtendOp>;
+def AArch64fcvtzu_half : SDNode<"AArch64ISD::FCVTZU_HALF", SDTFPExtendOp>;
//def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>;
>From 604902c18427782dcbfb4065cbfef30b0d61f4db Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski at arm.com>
Date: Wed, 27 Aug 2025 23:04:49 +0000
Subject: [PATCH 4/4] Move AArch64ISD selection into TableGen
---
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 27 -------------------
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 12 +++++++++
2 files changed, 12 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 1a90747274017..bc786f415b554 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -424,7 +424,6 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
unsigned Opc_rr, unsigned Opc_ri);
- void SelectFCVT_FPTOINT_Half(SDNode *N, unsigned Opc);
std::tuple<unsigned, SDValue, SDValue>
findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
const SDValue &OldBase, const SDValue &OldOffset,
@@ -2537,24 +2536,6 @@ void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
ReplaceNode(N, St);
}
-// Select f16 -> i16 conversions
-// Since i16 is an illegal type, we return the converted bit pattern in a f32
-// which can then be bitcast to i32 and truncated as needed.
-void AArch64DAGToDAGISel::SelectFCVT_FPTOINT_Half(SDNode *N, unsigned int Opc) {
- SDLoc DL(N);
- SDValue SrcVal = N->getOperand(0);
- SDNode *Cvt = CurDAG->getMachineNode(Opc, DL, MVT::f16, SrcVal);
- SDValue Sign = CurDAG->getTargetConstant(-1, DL, MVT::i64);
- SDValue Hsub = CurDAG->getTargetConstant(AArch64::hsub, DL, MVT::i32);
- SDNode *SubregToReg = CurDAG->getMachineNode(
- TargetOpcode::SUBREG_TO_REG, DL, MVT::v8f16, Sign, SDValue(Cvt, 0), Hsub);
- SDValue Ssub = CurDAG->getTargetConstant(AArch64::ssub, DL, MVT::i32);
- SDNode *Extract =
- CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f32,
- SDValue(SubregToReg, 0), Ssub);
- ReplaceNode(N, Extract);
-}
-
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
unsigned &Opc, SDValue &Opd0,
unsigned &LSB, unsigned &MSB,
@@ -7378,14 +7359,6 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
}
break;
}
- case AArch64ISD::FCVTZS_HALF: {
- SelectFCVT_FPTOINT_Half(Node, AArch64::FCVTZSv1f16);
- return;
- }
- case AArch64ISD::FCVTZU_HALF: {
- SelectFCVT_FPTOINT_Half(Node, AArch64::FCVTZUv1f16);
- return;
- }
}
// Select the default instruction
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 01d867c8abab3..ba33c585912fb 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6542,6 +6542,18 @@ defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
(CMLTv1i64rz V64:$Rn)>;
+// f16 -> s16 conversions leave the bit pattern in a f32
+class F16ToS16ScalarPat<SDNode cvt_isd, BaseSIMDTwoScalar instr>
+ : Pat<(f32 (cvt_isd (f16 FPR16:$Rn))),
+ (EXTRACT_SUBREG
+ (v8f16 (SUBREG_TO_REG (i64 0), (instr FPR16:$Rn), hsub)),
+ ssub)>;
+
+let Predicates = [HasFullFP16] in {
+def : F16ToS16ScalarPat<AArch64fcvtzs_half, FCVTZSv1f16>;
+def : F16ToS16ScalarPat<AArch64fcvtzu_half, FCVTZUv1f16>;
+}
+
// Round FP64 to BF16.
let Predicates = [HasNEONandIsStreamingSafe, HasBF16] in
def : Pat<(bf16 (any_fpround (f64 FPR64:$Rn))),
More information about the llvm-commits
mailing list