[llvm] da4cbec - [LLVM][SVE] Implement isel for fptoi half/float/double to i1. (#129269)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 4 03:34:48 PST 2025
Author: Paul Walker
Date: 2025-03-04T11:34:44Z
New Revision: da4cbeca6c5e147e7a3e19f1020befa8b119985e
URL: https://github.com/llvm/llvm-project/commit/da4cbeca6c5e147e7a3e19f1020befa8b119985e
DIFF: https://github.com/llvm/llvm-project/commit/da4cbeca6c5e147e7a3e19f1020befa8b119985e.diff
LOG: [LLVM][SVE] Implement isel for fptoi half/float/double to i1. (#129269)
Also adds an assert that SVE support for strict_fp fp<->int operations
is missing.
The added costs are to maintain the existing values expected by
Analysis/CostModel/AArch64/sve-cast.ll.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/CodeGen/AArch64/sve-fcvt.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b4bebc61f5dbe..32f2f5de060d2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1569,6 +1569,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// There are no legal MVT::nxv16f## based types.
if (VT != MVT::nxv16i1) {
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
}
@@ -4728,7 +4730,18 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
EVT InVT = Op.getOperand(IsStrict ? 1 : 0).getValueType();
EVT VT = Op.getValueType();
+ assert(!(IsStrict && VT.isScalableVector()) &&
+ "Unimplemented SVE support for STRICT_FP_to_INT!");
+
if (VT.isScalableVector()) {
+ if (VT.getVectorElementType() == MVT::i1) {
+ SDLoc DL(Op);
+ EVT CvtVT = getPromotedVTForPredicate(VT);
+ SDValue Cvt = DAG.getNode(Op.getOpcode(), DL, CvtVT, Op.getOperand(0));
+ SDValue Zero = DAG.getConstant(0, DL, CvtVT);
+ return DAG.getSetCC(DL, VT, Cvt, Zero, ISD::SETNE);
+ }
+
unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
? AArch64ISD::FCVTZU_MERGE_PASSTHRU
: AArch64ISD::FCVTZS_MERGE_PASSTHRU;
@@ -5034,6 +5047,9 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
unsigned Opc = Op.getOpcode();
bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
+ assert(!(IsStrict && VT.isScalableVector()) &&
+ "Unimplemented SVE support for ISD:::STRICT_INT_TO_FP!");
+
if (VT.isScalableVector()) {
if (InVT.getVectorElementType() == MVT::i1) {
SDValue FalseVal = DAG.getConstantFP(0.0, dl, VT);
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 65cd64a85150a..09a0e7ec172bf 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3017,20 +3017,24 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ISD::FP_TO_SINT, MVT::nxv2i32, MVT::nxv2f64, 1},
{ISD::FP_TO_SINT, MVT::nxv2i16, MVT::nxv2f64, 1},
{ISD::FP_TO_SINT, MVT::nxv2i8, MVT::nxv2f64, 1},
+ {ISD::FP_TO_SINT, MVT::nxv2i1, MVT::nxv2f64, 1},
{ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f64, 1},
{ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f64, 1},
{ISD::FP_TO_UINT, MVT::nxv2i16, MVT::nxv2f64, 1},
{ISD::FP_TO_UINT, MVT::nxv2i8, MVT::nxv2f64, 1},
+ {ISD::FP_TO_UINT, MVT::nxv2i1, MVT::nxv2f64, 1},
// Complex, from nxv4f32.
{ISD::FP_TO_SINT, MVT::nxv4i64, MVT::nxv4f32, 4},
{ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f32, 1},
{ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f32, 1},
{ISD::FP_TO_SINT, MVT::nxv4i8, MVT::nxv4f32, 1},
+ {ISD::FP_TO_SINT, MVT::nxv4i1, MVT::nxv4f32, 1},
{ISD::FP_TO_UINT, MVT::nxv4i64, MVT::nxv4f32, 4},
{ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f32, 1},
{ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f32, 1},
{ISD::FP_TO_UINT, MVT::nxv4i8, MVT::nxv4f32, 1},
+ {ISD::FP_TO_UINT, MVT::nxv4i1, MVT::nxv4f32, 1},
// Complex, from nxv8f64. Illegal -> illegal conversions not required.
{ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f64, 7},
@@ -3057,10 +3061,12 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ISD::FP_TO_SINT, MVT::nxv8i32, MVT::nxv8f16, 4},
{ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f16, 1},
{ISD::FP_TO_SINT, MVT::nxv8i8, MVT::nxv8f16, 1},
+ {ISD::FP_TO_SINT, MVT::nxv8i1, MVT::nxv8f16, 1},
{ISD::FP_TO_UINT, MVT::nxv8i64, MVT::nxv8f16, 10},
{ISD::FP_TO_UINT, MVT::nxv8i32, MVT::nxv8f16, 4},
{ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f16, 1},
{ISD::FP_TO_UINT, MVT::nxv8i8, MVT::nxv8f16, 1},
+ {ISD::FP_TO_UINT, MVT::nxv8i1, MVT::nxv8f16, 1},
// Complex, from nxv4f16.
{ISD::FP_TO_SINT, MVT::nxv4i64, MVT::nxv4f16, 4},
diff --git a/llvm/test/CodeGen/AArch64/sve-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-fcvt.ll
index a6749984af427..8b8ddb624a040 100644
--- a/llvm/test/CodeGen/AArch64/sve-fcvt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fcvt.ll
@@ -113,6 +113,120 @@ define <vscale x 2 x float> @fcvts_nxv2f64(<vscale x 2 x double> %a) {
; FP_TO_SINT
;
+define <vscale x 2 x i1> @fcvtzs_nxv2f16_to_nxv2i1(<vscale x 2 x half> %a) {
+; CHECK-LABEL: fcvtzs_nxv2f16_to_nxv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: ret
+ %res = fptosi <vscale x 2 x half> %a to <vscale x 2 x i1>
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @fcvtzs_nxv2f32_to_nxv2i1(<vscale x 2 x float> %a) {
+; CHECK-LABEL: fcvtzs_nxv2f32_to_nxv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: ret
+ %res = fptosi <vscale x 2 x float> %a to <vscale x 2 x i1>
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @fcvtzs_nxv2f64_to_nxv2i1(<vscale x 2 x double> %a) {
+; CHECK-LABEL: fcvtzs_nxv2f64_to_nxv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: ret
+ %res = fptosi <vscale x 2 x double> %a to <vscale x 2 x i1>
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 4 x i1> @fcvtzs_nxv4f16_to_nxv4i1(<vscale x 4 x half> %a) {
+; CHECK-LABEL: fcvtzs_nxv4f16_to_nxv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
+; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: ret
+ %res = fptosi <vscale x 4 x half> %a to <vscale x 4 x i1>
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 4 x i1> @fcvtzs_nxv4f32_to_nxv4i1(<vscale x 4 x float> %a) {
+; CHECK-LABEL: fcvtzs_nxv4f32_to_nxv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
+; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: ret
+ %res = fptosi <vscale x 4 x float> %a to <vscale x 4 x i1>
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 4 x i1> @fcvtzs_nxv4f64_to_nxv4i1(<vscale x 4 x double> %a) {
+; CHECK-LABEL: fcvtzs_nxv4f64_to_nxv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
+; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s
+; CHECK-NEXT: ret
+ %res = fptosi <vscale x 4 x double> %a to <vscale x 4 x i1>
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 8 x i1> @fcvtzs_nxv8f16_to_nxv8i1(<vscale x 8 x half> %a) {
+; CHECK-LABEL: fcvtzs_nxv8f16_to_nxv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
+; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: ret
+ %res = fptosi <vscale x 8 x half> %a to <vscale x 8 x i1>
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 8 x i1> @fcvtzs_nxv8f32_to_nxv8i1(<vscale x 8 x float> %a) {
+; CHECK-LABEL: fcvtzs_nxv8f32_to_nxv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtzs z1.s, p0/m, z1.s
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
+; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h
+; CHECK-NEXT: ret
+ %res = fptosi <vscale x 8 x float> %a to <vscale x 8 x i1>
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 8 x i1> @fcvtzs_nxv8f64_to_nxv8i1(<vscale x 8 x double> %a) {
+; CHECK-LABEL: fcvtzs_nxv8f64_to_nxv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: cmpne p1.d, p0/z, z3.d, #0
+; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0
+; CHECK-NEXT: cmpne p3.d, p0/z, z1.d, #0
+; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s
+; CHECK-NEXT: uzp1 p0.s, p0.s, p3.s
+; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h
+; CHECK-NEXT: ret
+ %res = fptosi <vscale x 8 x double> %a to <vscale x 8 x i1>
+ ret <vscale x 8 x i1> %res
+}
+
define <vscale x 2 x i16> @fcvtzs_h_nxv2f16(<vscale x 2 x half> %a) {
; CHECK-LABEL: fcvtzs_h_nxv2f16:
; CHECK: // %bb.0:
@@ -277,6 +391,120 @@ define <vscale x 2 x i64> @fcvtzs_d_nxv2f64(<vscale x 2 x double> %a) {
; FP_TO_UINT
;
+define <vscale x 2 x i1> @fcvtzu_nxv2f16_to_nxv2i1(<vscale x 2 x half> %a) {
+; CHECK-LABEL: fcvtzu_nxv2f16_to_nxv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: ret
+ %res = fptoui <vscale x 2 x half> %a to <vscale x 2 x i1>
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @fcvtzu_nxv2f32_to_nxv2i1(<vscale x 2 x float> %a) {
+; CHECK-LABEL: fcvtzu_nxv2f32_to_nxv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
+; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: ret
+ %res = fptoui <vscale x 2 x float> %a to <vscale x 2 x i1>
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @fcvtzu_nxv2f64_to_nxv2i1(<vscale x 2 x double> %a) {
+; CHECK-LABEL: fcvtzu_nxv2f64_to_nxv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
+; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: ret
+ %res = fptoui <vscale x 2 x double> %a to <vscale x 2 x i1>
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 4 x i1> @fcvtzu_nxv4f16_to_nxv4i1(<vscale x 4 x half> %a) {
+; CHECK-LABEL: fcvtzu_nxv4f16_to_nxv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
+; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: ret
+ %res = fptoui <vscale x 4 x half> %a to <vscale x 4 x i1>
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 4 x i1> @fcvtzu_nxv4f32_to_nxv4i1(<vscale x 4 x float> %a) {
+; CHECK-LABEL: fcvtzu_nxv4f32_to_nxv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
+; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: ret
+ %res = fptoui <vscale x 4 x float> %a to <vscale x 4 x i1>
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 4 x i1> @fcvtzu_nxv4f64_to_nxv4i1(<vscale x 4 x double> %a) {
+; CHECK-LABEL: fcvtzu_nxv4f64_to_nxv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.d
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
+; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
+; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s
+; CHECK-NEXT: ret
+ %res = fptoui <vscale x 4 x double> %a to <vscale x 4 x i1>
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 8 x i1> @fcvtzu_nxv8f16_to_nxv8i1(<vscale x 8 x half> %a) {
+; CHECK-LABEL: fcvtzu_nxv8f16_to_nxv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
+; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: ret
+ %res = fptoui <vscale x 8 x half> %a to <vscale x 8 x i1>
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 8 x i1> @fcvtzu_nxv8f32_to_nxv8i1(<vscale x 8 x float> %a) {
+; CHECK-LABEL: fcvtzu_nxv8f32_to_nxv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtzu z1.s, p0/m, z1.s
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
+; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h
+; CHECK-NEXT: ret
+ %res = fptoui <vscale x 8 x float> %a to <vscale x 8 x i1>
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 8 x i1> @fcvtzu_nxv8f64_to_nxv8i1(<vscale x 8 x double> %a) {
+; CHECK-LABEL: fcvtzu_nxv8f64_to_nxv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.d
+; CHECK-NEXT: fcvtzu z2.d, p0/m, z2.d
+; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.d
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
+; CHECK-NEXT: cmpne p1.d, p0/z, z3.d, #0
+; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0
+; CHECK-NEXT: cmpne p3.d, p0/z, z1.d, #0
+; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s
+; CHECK-NEXT: uzp1 p0.s, p0.s, p3.s
+; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h
+; CHECK-NEXT: ret
+ %res = fptoui <vscale x 8 x double> %a to <vscale x 8 x i1>
+ ret <vscale x 8 x i1> %res
+}
+
; NOTE: Using fcvtzs is safe as fptoui overflow is considered poison and a
; 64bit signed value encompasses the entire range of a 16bit unsigned value
define <vscale x 2 x i16> @fcvtzu_h_nxv2f16(<vscale x 2 x half> %a) {
More information about the llvm-commits
mailing list