[llvm] [X86][AVX10.2] Map vector saturated converts to public intrinsics (PR #121483)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 4 01:53:09 PST 2025
https://github.com/JaydeepChauhan14 updated https://github.com/llvm/llvm-project/pull/121483
>From 491755c243f06e12b9ee134334aeda3416839a76 Mon Sep 17 00:00:00 2001
From: Chauhan Jaydeep Ashwinbhai <chauhan.jaydeep.ashwinbhai at intel.com>
Date: Thu, 2 Jan 2025 23:22:45 +0800
Subject: [PATCH 1/4] Map vector saturated converts to public intrinsics
---
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 33 +++
llvm/lib/Target/X86/X86ISelLowering.cpp | 65 ++++-
llvm/lib/Target/X86/X86ISelLowering.h | 4 +
llvm/lib/Target/X86/X86InstrAVX10.td | 56 ++++
.../CodeGen/X86/avx10_2_512fptosi_satcvtds.ll | 122 +++++++++
.../CodeGen/X86/avx10_2fptosi_satcvtds.ll | 244 ++++++++++++++++++
6 files changed, 522 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 9b340a778b36ad..0dbae94d3f58e7 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5456,6 +5456,39 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
break;
}
+ case X86ISD::FP_TO_SINT_SAT_CUSTOM:
+ case X86ISD::FP_TO_UINT_SAT_CUSTOM:
+ if (Subtarget->hasAVX10_2()) {
+ bool IsSigned = Node->getOpcode() == X86ISD::FP_TO_SINT_SAT_CUSTOM;
+ SDValue Op = Node->getOperand(0);
+ EVT VT = Node->getValueType(0);
+ EVT OpVT = Op.getValueType();
+ MachineSDNode *MachineNode;
+
+ if (VT == MVT::v4i32 && OpVT == MVT::v4f32) {
+ if (IsSigned)
+ MachineNode = CurDAG->getMachineNode(X86::VCVTTPD2DQSZ128rr, dl,
+ MVT::v4i32, Op);
+ else
+ MachineNode = CurDAG->getMachineNode(X86::VCVTTPD2UDQSZ128rr, dl,
+ MVT::v4i32, Op);
+ }
+
+ if ((VT == MVT::v2i64 && OpVT == MVT::v2f64)) {
+ if (IsSigned)
+ MachineNode = CurDAG->getMachineNode(X86::VCVTTPS2QQSZ128rr, dl,
+ MVT::v2i64, Op);
+ else
+ MachineNode = CurDAG->getMachineNode(X86::VCVTTPS2UQQSZ128rr, dl,
+ MVT::v2i64, Op);
+ }
+
+ SDValue NewNode = SDValue(MachineNode, 0);
+ ReplaceNode(Node, NewNode.getNode());
+ return;
+ }
+ break;
+
case X86ISD::ANDNP:
if (tryVPTERNLOG(Node))
return;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a0514e93d6598b..3364043cda0563 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -341,8 +341,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
if (Subtarget.hasAVX10_2()) {
- setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Legal);
- setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v2i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v2i32, Custom);
+ for (MVT VT : {MVT::i32, MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64,
+ MVT::v4i64, MVT::v8i64}) {
+ setOperationAction(ISD::FP_TO_UINT_SAT, VT, Legal);
+ setOperationAction(ISD::FP_TO_SINT_SAT, VT, Legal);
+ }
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Legal);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Legal);
@@ -2656,6 +2661,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::UINT_TO_FP,
ISD::STRICT_SINT_TO_FP,
ISD::STRICT_UINT_TO_FP,
+ ISD::FP_TO_SINT_SAT,
+ ISD::FP_TO_UINT_SAT,
ISD::SETCC,
ISD::MUL,
ISD::XOR,
@@ -33665,6 +33672,30 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
return;
}
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT: {
+ if (!Subtarget.hasAVX10_2())
+ return;
+
+ bool IsSigned = Opc == ISD::FP_TO_SINT_SAT;
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op.getValueType();
+ SDValue V4I32;
+
+ if (VT == MVT::v2i32 && OpVT == MVT::v2f64) {
+ SDValue V4f32 = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Op);
+ if (IsSigned)
+ V4I32 =
+ DAG.getNode(X86ISD::FP_TO_SINT_SAT_CUSTOM, dl, MVT::v4i32, V4f32);
+ else
+ V4I32 =
+ DAG.getNode(X86ISD::FP_TO_UINT_SAT_CUSTOM, dl, MVT::v4i32, V4f32);
+ Results.push_back(V4I32);
+ return;
+ }
+ break;
+ }
case ISD::FP_TO_SINT:
case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
@@ -34645,6 +34676,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VPERMV3)
NODE_NAME_CASE(VPERMI)
NODE_NAME_CASE(VPTERNLOG)
+ NODE_NAME_CASE(FP_TO_SINT_SAT_CUSTOM)
+ NODE_NAME_CASE(FP_TO_UINT_SAT_CUSTOM)
NODE_NAME_CASE(VFIXUPIMM)
NODE_NAME_CASE(VFIXUPIMM_SAE)
NODE_NAME_CASE(VFIXUPIMMS)
@@ -56202,6 +56235,32 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Custom handling for VCVTTPS2QQS/VCVTTPS2UQQS
+static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ if (!Subtarget.hasAVX10_2())
+ return SDValue();
+
+ bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
+ EVT SrcVT = N->getOperand(0).getValueType();
+ EVT DstVT = N->getValueType(0);
+ SDLoc dl(N);
+
+ if (SrcVT == MVT::v2f32 && DstVT == MVT::v2i64) {
+ // Convert v2f32 to v2f64
+ SDValue V2F64 =
+ DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, N->getOperand(0));
+
+ // Select the FP_TO_SINT_SAT_CUSTOM/FP_TO_UINT_SAT_CUSTOM node
+ if (IsSigned)
+ return DAG.getNode(X86ISD::FP_TO_SINT_SAT_CUSTOM, dl, MVT::v2i64, V2F64);
+ else
+ return DAG.getNode(X86ISD::FP_TO_UINT_SAT_CUSTOM, dl, MVT::v2i64, V2F64);
+ }
+
+ return SDValue();
+}
+
static bool needCarryOrOverflowFlag(SDValue Flags) {
assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
@@ -59315,6 +59374,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::INTRINSIC_WO_CHAIN: return combineINTRINSIC_WO_CHAIN(N, DAG, DCI);
case ISD::INTRINSIC_W_CHAIN: return combineINTRINSIC_W_CHAIN(N, DAG, DCI);
case ISD::INTRINSIC_VOID: return combineINTRINSIC_VOID(N, DAG, DCI);
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT: return combineFP_TO_xINT_SAT(N, DAG, Subtarget);
// clang-format on
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 2b7a8eaf249d83..0c04cf122bddd9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -908,6 +908,10 @@ namespace llvm {
// Load x87 FPU environment from memory.
FLDENVm,
+ // Custom handling for FP_TO_xINT_SAT
+ FP_TO_SINT_SAT_CUSTOM,
+ FP_TO_UINT_SAT_CUSTOM,
+
/// This instruction implements FP_TO_SINT with the
/// integer destination in memory and a FP reg source. This corresponds
/// to the X86::FIST*m instructions and the rounding mode change stuff. It
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 3bc64eda01a9ce..e373111fe6c008 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -831,6 +831,62 @@ let Predicates = [HasAVX10_2] in {
// patterns have been disabled with null_frag.
// Patterns VCVTTPD2DQSZ128
+// VCVTTPD2DQS
+def : Pat<(v4i32(fp_to_sint_sat(v4f64 VR256X:$src), i32)),
+ (VCVTTPD2DQSZ256rr VR256X:$src)>;
+def : Pat<(v8i32(fp_to_sint_sat(v8f64 VR512:$src), i32)),
+ (VCVTTPD2DQSZrr VR512:$src)>;
+
+// VCVTTPD2QQS
+def : Pat<(v2i64(fp_to_sint_sat(v2f64 VR128X:$src), i64)),
+ (VCVTTPD2QQSZ128rr VR128X:$src)>;
+def : Pat<(v4i64(fp_to_sint_sat(v4f64 VR256X:$src), i64)),
+ (VCVTTPD2QQSZ256rr VR256X:$src)>;
+def : Pat<(v8i64(fp_to_sint_sat(v8f64 VR512:$src), i64)),
+ (VCVTTPD2QQSZrr VR512:$src)>;
+
+// VCVTTPD2UDQS
+def : Pat<(v4i32(fp_to_uint_sat(v4f64 VR256X:$src), i32)),
+ (VCVTTPD2UDQSZ256rr VR256X:$src)>;
+def : Pat<(v8i32(fp_to_uint_sat(v8f64 VR512:$src), i32)),
+ (VCVTTPD2UDQSZrr VR512:$src)>;
+
+// VCVTTPD2UQQS
+def : Pat<(v2i64(fp_to_uint_sat(v2f64 VR128X:$src), i64)),
+ (VCVTTPD2UQQSZ128rr VR128X:$src)>;
+def : Pat<(v4i64(fp_to_uint_sat(v4f64 VR256X:$src), i64)),
+ (VCVTTPD2UQQSZ256rr VR256X:$src)>;
+def : Pat<(v8i64(fp_to_uint_sat(v8f64 VR512:$src), i64)),
+ (VCVTTPD2UQQSZrr VR512:$src)>;
+
+// VCVTTPS2DQS
+def : Pat<(v4i32(fp_to_sint_sat(v4f32 VR128X:$src), i32)),
+ (VCVTTPS2DQSZ128rr VR128X:$src)>;
+def : Pat<(v8i32(fp_to_sint_sat(v8f32 VR256X:$src), i32)),
+ (VCVTTPS2DQSZ256rr VR256X:$src)>;
+def : Pat<(v16i32(fp_to_sint_sat(v16f32 VR512:$src), i32)),
+ (VCVTTPS2DQSZrr VR512:$src)>;
+
+// VCVTTPS2QQS
+def : Pat<(v4i64(fp_to_sint_sat(v4f32 VR128X:$src), i64)),
+ (VCVTTPS2QQSZ256rr VR128X:$src)>;
+def : Pat<(v8i64(fp_to_sint_sat(v8f32 VR256X:$src), i64)),
+ (VCVTTPS2QQSZrr VR256X:$src)>;
+
+// VCVTTPS2UDQS
+def : Pat<(v4i32(fp_to_uint_sat(v4f32 VR128X:$src), i32)),
+ (VCVTTPS2UDQSZ128rr VR128X:$src)>;
+def : Pat<(v8i32(fp_to_uint_sat(v8f32 VR256X:$src), i32)),
+ (VCVTTPS2UDQSZ256rr VR256X:$src)>;
+def : Pat<(v16i32(fp_to_uint_sat(v16f32 VR512:$src), i32)),
+ (VCVTTPS2UDQSZrr VR512:$src)>;
+
+// VCVTTPS2UQQS
+def : Pat<(v4i64(fp_to_uint_sat(v4f32 VR128X:$src), i64)),
+ (VCVTTPS2UQQSZ256rr VR128X:$src)>;
+def : Pat<(v8i64(fp_to_uint_sat(v8f32 VR256X:$src), i64)),
+ (VCVTTPS2UQQSZrr VR256X:$src)>;
+
def : Pat<(v4i32 (X86cvttp2sis (v2f64 VR128X:$src))),
(VCVTTPD2DQSZ128rr VR128X:$src)>;
def : Pat<(v4i32 (X86cvttp2sis (loadv2f64 addr:$src))),
diff --git a/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll
new file mode 100644
index 00000000000000..70465a28bad6a7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X64
+
+; VCVTTPD2DQS
+define <8 x i32> @test_signed_v8i32_v8f64(<8 x double> %f) nounwind {
+; X86-LABEL: test_signed_v8i32_v8f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2dqs %zmm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v8i32_v8f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2dqs %zmm0, %ymm0
+; X64-NEXT: retq
+ %x = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> %f)
+ ret <8 x i32> %x
+}
+
+; VCVTTPD2QQS
+define <8 x i64> @test_signed_v8i64_v8f64(<8 x double> %f) nounwind {
+; X86-LABEL: test_signed_v8i64_v8f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2qqs %zmm0, %zmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v8i64_v8f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2qqs %zmm0, %zmm0
+; X64-NEXT: retq
+ %x = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> %f)
+ ret <8 x i64> %x
+}
+
+; VCVTTPD2UDQS
+define <8 x i32> @test_unsigned_v8i32_v8f64(<8 x double> %f) nounwind {
+; X86-LABEL: test_unsigned_v8i32_v8f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2udqs %zmm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v8i32_v8f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2udqs %zmm0, %ymm0
+; X64-NEXT: retq
+ %x = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> %f)
+ ret <8 x i32> %x
+}
+
+; VCVTTPD2UQQS
+define <8 x i64> @test_unsigned_v8i64_v8f64(<8 x double> %f) nounwind {
+; X86-LABEL: test_unsigned_v8i64_v8f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2uqqs %zmm0, %zmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v8i64_v8f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2uqqs %zmm0, %zmm0
+; X64-NEXT: retq
+ %x = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> %f)
+ ret <8 x i64> %x
+}
+
+; VCVTTPS2DQS
+define <16 x i32> @test_signed_v16i32_v16f32(<16 x float> %f) nounwind {
+; X86-LABEL: test_signed_v16i32_v16f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttps2dqs %zmm0, %zmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v16i32_v16f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2dqs %zmm0, %zmm0
+; X64-NEXT: retq
+ %x = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> %f)
+ ret <16 x i32> %x
+}
+
+; VCVTTPS2UDQS
+define <16 x i32> @test_unsigned_v16i32_v16f32(<16 x float> %f) nounwind {
+; X86-LABEL: test_unsigned_v16i32_v16f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttps2udqs %zmm0, %zmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v16i32_v16f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2udqs %zmm0, %zmm0
+; X64-NEXT: retq
+ %x = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> %f)
+ ret <16 x i32> %x
+}
+; VCVTTPS2QQS
+define <8 x i64> @test_signed_v8i64_v8f32(<8 x float> %f) nounwind {
+; X86-LABEL: test_signed_v8i64_v8f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttps2qqs %ymm0, %zmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v8i64_v8f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2qqs %ymm0, %zmm0
+; X64-NEXT: retq
+ %x = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> %f)
+ ret <8 x i64> %x
+}
+
+; VCVTTPS2UQQS
+define <8 x i64> @test_unsigned_v8i64_v8f32(<8 x float> %f) nounwind {
+; X86-LABEL: test_unsigned_v8i64_v8f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttps2uqqs %ymm0, %zmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v8i64_v8f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2uqqs %ymm0, %zmm0
+; X64-NEXT: retq
+ %x = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> %f)
+ ret <8 x i64> %x
+}
diff --git a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
index 494e4bc8e068e4..0c731a09f8dbd2 100644
--- a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
+++ b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
@@ -112,3 +112,247 @@ define i64 @test_signed_i64_f64(double %f) nounwind {
%x = call i64 @llvm.fptosi.sat.i64.f64(double %f)
ret i64 %x
}
+
+; VCVTTPD2DQS
+define <2 x i32> @test_signed_v2i32_v2f64(<2 x double> %d) nounwind {
+; X86-LABEL: test_signed_v2i32_v2f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvtpd2ps %xmm0, %xmm0
+; X86-NEXT: vcvttpd2dqs %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v2i32_v2f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvtpd2ps %xmm0, %xmm0
+; X64-NEXT: vcvttpd2dqs %xmm0, %xmm0
+; X64-NEXT: retq
+ %x = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %d)
+ ret <2 x i32> %x
+}
+
+define <4 x i32> @test_signed_v4i32_v4f64(<4 x double> %f) nounwind {
+; X86-LABEL: test_signed_v4i32_v4f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2dqs %ymm0, %xmm0
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v4i32_v4f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2dqs %ymm0, %xmm0
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+ %x = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> %f)
+ ret <4 x i32> %x
+}
+
+; VCVTTPD2QQS
+define <2 x i64> @test_signed_v2i64_v2f64(<2 x double> %f) nounwind {
+; X86-LABEL: test_signed_v2i64_v2f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2qqs %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v2i64_v2f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2qqs %xmm0, %xmm0
+; X64-NEXT: retq
+ %x = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %f)
+ ret <2 x i64> %x
+}
+
+define <4 x i64> @test_signed_v4i64_v4f64(<4 x double> %f) nounwind {
+; X86-LABEL: test_signed_v4i64_v4f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2qqs %ymm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v4i64_v4f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2qqs %ymm0, %ymm0
+; X64-NEXT: retq
+ %x = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> %f)
+ ret <4 x i64> %x
+}
+
+; VCVTTPD2UDQS
+define <2 x i32> @test_unsigned_v2i32_v2f64(<2 x double> %d) nounwind {
+; X86-LABEL: test_unsigned_v2i32_v2f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvtpd2ps %xmm0, %xmm0
+; X86-NEXT: vcvttpd2udqs %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v2i32_v2f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvtpd2ps %xmm0, %xmm0
+; X64-NEXT: vcvttpd2udqs %xmm0, %xmm0
+; X64-NEXT: retq
+ %x = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> %d)
+ ret <2 x i32> %x
+}
+
+define <4 x i32> @test_unsigned_v4i32_v4f64(<4 x double> %f) nounwind {
+; X86-LABEL: test_unsigned_v4i32_v4f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2udqs %ymm0, %xmm0
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v4i32_v4f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2udqs %ymm0, %xmm0
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+ %x = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> %f)
+ ret <4 x i32> %x
+}
+
+; VCVTTPD2UQQS
+define <2 x i64> @test_unsigned_v2i64_v2f64(<2 x double> %f) nounwind {
+; X86-LABEL: test_unsigned_v2i64_v2f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2uqqs %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v2i64_v2f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2uqqs %xmm0, %xmm0
+; X64-NEXT: retq
+ %x = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %f)
+ ret <2 x i64> %x
+}
+
+define <4 x i64> @test_unsigned_v4i64_v4f64(<4 x double> %f) nounwind {
+; X86-LABEL: test_unsigned_v4i64_v4f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2uqqs %ymm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v4i64_v4f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2uqqs %ymm0, %ymm0
+; X64-NEXT: retq
+ %x = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> %f)
+ ret <4 x i64> %x
+}
+
+; VCVTTPS2DQS
+define <4 x i32> @test_signed_v4i32_v4f32(<4 x float> %f) nounwind {
+; X86-LABEL: test_signed_v4i32_v4f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttps2dqs %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v4i32_v4f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2dqs %xmm0, %xmm0
+; X64-NEXT: retq
+ %x = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %f)
+ ret <4 x i32> %x
+}
+
+define <8 x i32> @test_signed_v8i32_v8f32(<8 x float> %f) nounwind {
+; X86-LABEL: test_signed_v8i32_v8f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttps2dqs %ymm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v8i32_v8f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2dqs %ymm0, %ymm0
+; X64-NEXT: retq
+ %x = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> %f)
+ ret <8 x i32> %x
+}
+
+; VCVTTPS2UDQS
+define <4 x i32> @test_unsigned_v4i32_v4f32(<4 x float> %f) nounwind {
+; X86-LABEL: test_unsigned_v4i32_v4f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttps2udqs %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v4i32_v4f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2udqs %xmm0, %xmm0
+; X64-NEXT: retq
+ %x = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %f)
+ ret <4 x i32> %x
+}
+
+define <8 x i32> @test_unsigned_v8i32_v8f32(<8 x float> %f) nounwind {
+; X86-LABEL: test_unsigned_v8i32_v8f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttps2udqs %ymm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v8i32_v8f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2udqs %ymm0, %ymm0
+; X64-NEXT: retq
+ %x = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> %f)
+ ret <8 x i32> %x
+}
+
+; VCVTTPS2QQS
+define <2 x i64> @test_signed_v2i64_v2f32(<2 x float> %f) nounwind {
+; X86-LABEL: test_signed_v2i64_v2f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvtps2pd %xmm0, %xmm0
+; X86-NEXT: vcvttps2qqs %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v2i64_v2f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvtps2pd %xmm0, %xmm0
+; X64-NEXT: vcvttps2qqs %xmm0, %xmm0
+; X64-NEXT: retq
+ %x = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %f)
+ ret <2 x i64> %x
+}
+
+define <4 x i64> @test_signed_v4i64_v4f32(<4 x float> %f) nounwind {
+; X86-LABEL: test_signed_v4i64_v4f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttps2qqs %xmm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v4i64_v4f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2qqs %xmm0, %ymm0
+; X64-NEXT: retq
+ %x = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> %f)
+ ret <4 x i64> %x
+}
+
+; VCVTTPS2UQQS
+define <2 x i64> @test_unsigned_v2i64_v2f32(<2 x float> %f) nounwind {
+; X86-LABEL: test_unsigned_v2i64_v2f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvtps2pd %xmm0, %xmm0
+; X86-NEXT: vcvttps2uqqs %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v2i64_v2f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvtps2pd %xmm0, %xmm0
+; X64-NEXT: vcvttps2uqqs %xmm0, %xmm0
+; X64-NEXT: retq
+ %x = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> %f)
+ ret <2 x i64> %x
+}
+
+define <4 x i64> @test_unsigned_v4i64_v4f32(<4 x float> %f) nounwind {
+; X86-LABEL: test_unsigned_v4i64_v4f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttps2uqqs %xmm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v4i64_v4f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2uqqs %xmm0, %ymm0
+; X64-NEXT: retq
+ %x = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> %f)
+ ret <4 x i64> %x
+}
>From a51c29dde73000309155a3ecb2500d8488b300ab Mon Sep 17 00:00:00 2001
From: Chauhan Jaydeep Ashwinbhai <chauhan.jaydeep.ashwinbhai at intel.com>
Date: Fri, 3 Jan 2025 16:19:12 +0800
Subject: [PATCH 2/4] Addressed the review comments1
---
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 58 +++--
llvm/lib/Target/X86/X86ISelLowering.cpp | 25 +-
llvm/lib/Target/X86/X86ISelLowering.h | 4 +-
.../CodeGen/X86/avx10_2_512fptosi_satcvtds.ll | 111 +++------
.../CodeGen/X86/avx10_2fptosi_satcvtds.ll | 230 ++++++------------
5 files changed, 152 insertions(+), 276 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 0dbae94d3f58e7..5e736a9bbb7ac3 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5456,39 +5456,37 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
break;
}
- case X86ISD::FP_TO_SINT_SAT_CUSTOM:
- case X86ISD::FP_TO_UINT_SAT_CUSTOM:
- if (Subtarget->hasAVX10_2()) {
- bool IsSigned = Node->getOpcode() == X86ISD::FP_TO_SINT_SAT_CUSTOM;
- SDValue Op = Node->getOperand(0);
- EVT VT = Node->getValueType(0);
- EVT OpVT = Op.getValueType();
- MachineSDNode *MachineNode;
-
- if (VT == MVT::v4i32 && OpVT == MVT::v4f32) {
- if (IsSigned)
- MachineNode = CurDAG->getMachineNode(X86::VCVTTPD2DQSZ128rr, dl,
- MVT::v4i32, Op);
- else
- MachineNode = CurDAG->getMachineNode(X86::VCVTTPD2UDQSZ128rr, dl,
- MVT::v4i32, Op);
- }
-
- if ((VT == MVT::v2i64 && OpVT == MVT::v2f64)) {
- if (IsSigned)
- MachineNode = CurDAG->getMachineNode(X86::VCVTTPS2QQSZ128rr, dl,
- MVT::v2i64, Op);
- else
- MachineNode = CurDAG->getMachineNode(X86::VCVTTPS2UQQSZ128rr, dl,
- MVT::v2i64, Op);
- }
+ case X86ISD::FP_TO_SINT_SAT:
+ case X86ISD::FP_TO_UINT_SAT: {
+ assert(Subtarget->hasAVX10_2() && "Unsupported node");
+ bool IsSigned = Node->getOpcode() == X86ISD::FP_TO_SINT_SAT;
+ SDValue Op = Node->getOperand(0);
+ EVT VT = Node->getValueType(0);
+ EVT OpVT = Op.getValueType();
+ MachineSDNode *MachineNode;
+
+ if (VT == MVT::v4i32 && OpVT == MVT::v4f32) {
+ if (IsSigned)
+ MachineNode =
+ CurDAG->getMachineNode(X86::VCVTTPD2DQSZ128rr, dl, MVT::v4i32, Op);
+ else
+ MachineNode =
+ CurDAG->getMachineNode(X86::VCVTTPD2UDQSZ128rr, dl, MVT::v4i32, Op);
+ }
- SDValue NewNode = SDValue(MachineNode, 0);
- ReplaceNode(Node, NewNode.getNode());
- return;
+ if ((VT == MVT::v2i64 && OpVT == MVT::v2f64)) {
+ if (IsSigned)
+ MachineNode =
+ CurDAG->getMachineNode(X86::VCVTTPS2QQSZ128rr, dl, MVT::v2i64, Op);
+ else
+ MachineNode =
+ CurDAG->getMachineNode(X86::VCVTTPS2UQQSZ128rr, dl, MVT::v2i64, Op);
}
- break;
+ SDValue NewNode = SDValue(MachineNode, 0);
+ ReplaceNode(Node, NewNode.getNode());
+ return;
+ }
case X86ISD::ANDNP:
if (tryVPTERNLOG(Node))
return;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3364043cda0563..a009706daede45 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -344,10 +344,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v2i32, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v2i32, Custom);
for (MVT VT : {MVT::i32, MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64,
- MVT::v4i64, MVT::v8i64}) {
+ MVT::v4i64}) {
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Legal);
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Legal);
}
+ if (Subtarget.hasAVX10_2_512()) {
+ setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v8i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v8i64, Legal);
+ }
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Legal);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Legal);
@@ -33686,11 +33690,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
if (VT == MVT::v2i32 && OpVT == MVT::v2f64) {
SDValue V4f32 = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Op);
if (IsSigned)
- V4I32 =
- DAG.getNode(X86ISD::FP_TO_SINT_SAT_CUSTOM, dl, MVT::v4i32, V4f32);
+ V4I32 = DAG.getNode(X86ISD::FP_TO_SINT_SAT, dl, MVT::v4i32, V4f32);
else
- V4I32 =
- DAG.getNode(X86ISD::FP_TO_UINT_SAT_CUSTOM, dl, MVT::v4i32, V4f32);
+ V4I32 = DAG.getNode(X86ISD::FP_TO_UINT_SAT, dl, MVT::v4i32, V4f32);
Results.push_back(V4I32);
return;
}
@@ -34676,8 +34678,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VPERMV3)
NODE_NAME_CASE(VPERMI)
NODE_NAME_CASE(VPTERNLOG)
- NODE_NAME_CASE(FP_TO_SINT_SAT_CUSTOM)
- NODE_NAME_CASE(FP_TO_UINT_SAT_CUSTOM)
+ NODE_NAME_CASE(FP_TO_SINT_SAT)
+ NODE_NAME_CASE(FP_TO_UINT_SAT)
NODE_NAME_CASE(VFIXUPIMM)
NODE_NAME_CASE(VFIXUPIMM_SAE)
NODE_NAME_CASE(VFIXUPIMMS)
@@ -56251,13 +56253,12 @@ static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
SDValue V2F64 =
DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, N->getOperand(0));
- // Select the FP_TO_SINT_SAT_CUSTOM/FP_TO_UINT_SAT_CUSTOM node
+ // Select the FP_TO_SINT_SAT/FP_TO_UINT_SAT node
if (IsSigned)
- return DAG.getNode(X86ISD::FP_TO_SINT_SAT_CUSTOM, dl, MVT::v2i64, V2F64);
- else
- return DAG.getNode(X86ISD::FP_TO_UINT_SAT_CUSTOM, dl, MVT::v2i64, V2F64);
- }
+ return DAG.getNode(X86ISD::FP_TO_SINT_SAT, dl, MVT::v2i64, V2F64);
+ return DAG.getNode(X86ISD::FP_TO_UINT_SAT, dl, MVT::v2i64, V2F64);
+ }
return SDValue();
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 0c04cf122bddd9..eaedaa0b88d22c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -909,8 +909,8 @@ namespace llvm {
FLDENVm,
// Custom handling for FP_TO_xINT_SAT
- FP_TO_SINT_SAT_CUSTOM,
- FP_TO_UINT_SAT_CUSTOM,
+ FP_TO_SINT_SAT,
+ FP_TO_UINT_SAT,
/// This instruction implements FP_TO_SINT with the
/// integer destination in memory and a FP reg source. This corresponds
diff --git a/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll
index 70465a28bad6a7..d7ad7b048c6d69 100644
--- a/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll
+++ b/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll
@@ -1,122 +1,85 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64
; VCVTTPD2DQS
define <8 x i32> @test_signed_v8i32_v8f64(<8 x double> %f) nounwind {
-; X86-LABEL: test_signed_v8i32_v8f64:
-; X86: # %bb.0:
-; X86-NEXT: vcvttpd2dqs %zmm0, %ymm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_signed_v8i32_v8f64:
-; X64: # %bb.0:
-; X64-NEXT: vcvttpd2dqs %zmm0, %ymm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_signed_v8i32_v8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttpd2dqs %zmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> %f)
ret <8 x i32> %x
}
; VCVTTPD2QQS
define <8 x i64> @test_signed_v8i64_v8f64(<8 x double> %f) nounwind {
-; X86-LABEL: test_signed_v8i64_v8f64:
-; X86: # %bb.0:
-; X86-NEXT: vcvttpd2qqs %zmm0, %zmm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_signed_v8i64_v8f64:
-; X64: # %bb.0:
-; X64-NEXT: vcvttpd2qqs %zmm0, %zmm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_signed_v8i64_v8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttpd2qqs %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> %f)
ret <8 x i64> %x
}
; VCVTTPD2UDQS
define <8 x i32> @test_unsigned_v8i32_v8f64(<8 x double> %f) nounwind {
-; X86-LABEL: test_unsigned_v8i32_v8f64:
-; X86: # %bb.0:
-; X86-NEXT: vcvttpd2udqs %zmm0, %ymm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_unsigned_v8i32_v8f64:
-; X64: # %bb.0:
-; X64-NEXT: vcvttpd2udqs %zmm0, %ymm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_unsigned_v8i32_v8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttpd2udqs %zmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> %f)
ret <8 x i32> %x
}
; VCVTTPD2UQQS
define <8 x i64> @test_unsigned_v8i64_v8f64(<8 x double> %f) nounwind {
-; X86-LABEL: test_unsigned_v8i64_v8f64:
-; X86: # %bb.0:
-; X86-NEXT: vcvttpd2uqqs %zmm0, %zmm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_unsigned_v8i64_v8f64:
-; X64: # %bb.0:
-; X64-NEXT: vcvttpd2uqqs %zmm0, %zmm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_unsigned_v8i64_v8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttpd2uqqs %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> %f)
ret <8 x i64> %x
}
; VCVTTPS2DQS
define <16 x i32> @test_signed_v16i32_v16f32(<16 x float> %f) nounwind {
-; X86-LABEL: test_signed_v16i32_v16f32:
-; X86: # %bb.0:
-; X86-NEXT: vcvttps2dqs %zmm0, %zmm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_signed_v16i32_v16f32:
-; X64: # %bb.0:
-; X64-NEXT: vcvttps2dqs %zmm0, %zmm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_signed_v16i32_v16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttps2dqs %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> %f)
ret <16 x i32> %x
}
; VCVTTPS2UDQS
define <16 x i32> @test_unsigned_v16i32_v16f32(<16 x float> %f) nounwind {
-; X86-LABEL: test_unsigned_v16i32_v16f32:
-; X86: # %bb.0:
-; X86-NEXT: vcvttps2udqs %zmm0, %zmm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_unsigned_v16i32_v16f32:
-; X64: # %bb.0:
-; X64-NEXT: vcvttps2udqs %zmm0, %zmm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_unsigned_v16i32_v16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttps2udqs %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> %f)
ret <16 x i32> %x
}
; VCVTTPS2QQS
define <8 x i64> @test_signed_v8i64_v8f32(<8 x float> %f) nounwind {
-; X86-LABEL: test_signed_v8i64_v8f32:
-; X86: # %bb.0:
-; X86-NEXT: vcvttps2qqs %ymm0, %zmm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_signed_v8i64_v8f32:
-; X64: # %bb.0:
-; X64-NEXT: vcvttps2qqs %ymm0, %zmm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_signed_v8i64_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttps2qqs %ymm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> %f)
ret <8 x i64> %x
}
; VCVTTPS2UQQS
define <8 x i64> @test_unsigned_v8i64_v8f32(<8 x float> %f) nounwind {
-; X86-LABEL: test_unsigned_v8i64_v8f32:
-; X86: # %bb.0:
-; X86-NEXT: vcvttps2uqqs %ymm0, %zmm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_unsigned_v8i64_v8f32:
-; X64: # %bb.0:
-; X64-NEXT: vcvttps2uqqs %ymm0, %zmm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_unsigned_v8i64_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttps2uqqs %ymm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> %f)
ret <8 x i64> %x
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; X64: {{.*}}
+; X86: {{.*}}
diff --git a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
index 0c731a09f8dbd2..a975a239170649 100644
--- a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
+++ b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64
;
; 32-bit float to signed integer
@@ -115,244 +115,158 @@ define i64 @test_signed_i64_f64(double %f) nounwind {
; VCVTTPD2DQS
define <2 x i32> @test_signed_v2i32_v2f64(<2 x double> %d) nounwind {
-; X86-LABEL: test_signed_v2i32_v2f64:
-; X86: # %bb.0:
-; X86-NEXT: vcvtpd2ps %xmm0, %xmm0
-; X86-NEXT: vcvttpd2dqs %xmm0, %xmm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_signed_v2i32_v2f64:
-; X64: # %bb.0:
-; X64-NEXT: vcvtpd2ps %xmm0, %xmm0
-; X64-NEXT: vcvttpd2dqs %xmm0, %xmm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_signed_v2i32_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0
+; CHECK-NEXT: vcvttpd2dqs %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %d)
ret <2 x i32> %x
}
define <4 x i32> @test_signed_v4i32_v4f64(<4 x double> %f) nounwind {
-; X86-LABEL: test_signed_v4i32_v4f64:
-; X86: # %bb.0:
-; X86-NEXT: vcvttpd2dqs %ymm0, %xmm0
-; X86-NEXT: vzeroupper
-; X86-NEXT: retl
-;
-; X64-LABEL: test_signed_v4i32_v4f64:
-; X64: # %bb.0:
-; X64-NEXT: vcvttpd2dqs %ymm0, %xmm0
-; X64-NEXT: vzeroupper
-; X64-NEXT: retq
+; CHECK-LABEL: test_signed_v4i32_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttpd2dqs %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> %f)
ret <4 x i32> %x
}
; VCVTTPD2QQS
define <2 x i64> @test_signed_v2i64_v2f64(<2 x double> %f) nounwind {
-; X86-LABEL: test_signed_v2i64_v2f64:
-; X86: # %bb.0:
-; X86-NEXT: vcvttpd2qqs %xmm0, %xmm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_signed_v2i64_v2f64:
-; X64: # %bb.0:
-; X64-NEXT: vcvttpd2qqs %xmm0, %xmm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_signed_v2i64_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttpd2qqs %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %f)
ret <2 x i64> %x
}
define <4 x i64> @test_signed_v4i64_v4f64(<4 x double> %f) nounwind {
-; X86-LABEL: test_signed_v4i64_v4f64:
-; X86: # %bb.0:
-; X86-NEXT: vcvttpd2qqs %ymm0, %ymm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_signed_v4i64_v4f64:
-; X64: # %bb.0:
-; X64-NEXT: vcvttpd2qqs %ymm0, %ymm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_signed_v4i64_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttpd2qqs %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> %f)
ret <4 x i64> %x
}
; VCVTTPD2UDQS
define <2 x i32> @test_unsigned_v2i32_v2f64(<2 x double> %d) nounwind {
-; X86-LABEL: test_unsigned_v2i32_v2f64:
-; X86: # %bb.0:
-; X86-NEXT: vcvtpd2ps %xmm0, %xmm0
-; X86-NEXT: vcvttpd2udqs %xmm0, %xmm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_unsigned_v2i32_v2f64:
-; X64: # %bb.0:
-; X64-NEXT: vcvtpd2ps %xmm0, %xmm0
-; X64-NEXT: vcvttpd2udqs %xmm0, %xmm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_unsigned_v2i32_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0
+; CHECK-NEXT: vcvttpd2udqs %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> %d)
ret <2 x i32> %x
}
define <4 x i32> @test_unsigned_v4i32_v4f64(<4 x double> %f) nounwind {
-; X86-LABEL: test_unsigned_v4i32_v4f64:
-; X86: # %bb.0:
-; X86-NEXT: vcvttpd2udqs %ymm0, %xmm0
-; X86-NEXT: vzeroupper
-; X86-NEXT: retl
-;
-; X64-LABEL: test_unsigned_v4i32_v4f64:
-; X64: # %bb.0:
-; X64-NEXT: vcvttpd2udqs %ymm0, %xmm0
-; X64-NEXT: vzeroupper
-; X64-NEXT: retq
+; CHECK-LABEL: test_unsigned_v4i32_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttpd2udqs %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> %f)
ret <4 x i32> %x
}
; VCVTTPD2UQQS
define <2 x i64> @test_unsigned_v2i64_v2f64(<2 x double> %f) nounwind {
-; X86-LABEL: test_unsigned_v2i64_v2f64:
-; X86: # %bb.0:
-; X86-NEXT: vcvttpd2uqqs %xmm0, %xmm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_unsigned_v2i64_v2f64:
-; X64: # %bb.0:
-; X64-NEXT: vcvttpd2uqqs %xmm0, %xmm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_unsigned_v2i64_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttpd2uqqs %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %f)
ret <2 x i64> %x
}
define <4 x i64> @test_unsigned_v4i64_v4f64(<4 x double> %f) nounwind {
-; X86-LABEL: test_unsigned_v4i64_v4f64:
-; X86: # %bb.0:
-; X86-NEXT: vcvttpd2uqqs %ymm0, %ymm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_unsigned_v4i64_v4f64:
-; X64: # %bb.0:
-; X64-NEXT: vcvttpd2uqqs %ymm0, %ymm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_unsigned_v4i64_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttpd2uqqs %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> %f)
ret <4 x i64> %x
}
; VCVTTPS2DQS
define <4 x i32> @test_signed_v4i32_v4f32(<4 x float> %f) nounwind {
-; X86-LABEL: test_signed_v4i32_v4f32:
-; X86: # %bb.0:
-; X86-NEXT: vcvttps2dqs %xmm0, %xmm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_signed_v4i32_v4f32:
-; X64: # %bb.0:
-; X64-NEXT: vcvttps2dqs %xmm0, %xmm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_signed_v4i32_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttps2dqs %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %f)
ret <4 x i32> %x
}
define <8 x i32> @test_signed_v8i32_v8f32(<8 x float> %f) nounwind {
-; X86-LABEL: test_signed_v8i32_v8f32:
-; X86: # %bb.0:
-; X86-NEXT: vcvttps2dqs %ymm0, %ymm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_signed_v8i32_v8f32:
-; X64: # %bb.0:
-; X64-NEXT: vcvttps2dqs %ymm0, %ymm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_signed_v8i32_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttps2dqs %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> %f)
ret <8 x i32> %x
}
; VCVTTPS2UDQS
define <4 x i32> @test_unsigned_v4i32_v4f32(<4 x float> %f) nounwind {
-; X86-LABEL: test_unsigned_v4i32_v4f32:
-; X86: # %bb.0:
-; X86-NEXT: vcvttps2udqs %xmm0, %xmm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_unsigned_v4i32_v4f32:
-; X64: # %bb.0:
-; X64-NEXT: vcvttps2udqs %xmm0, %xmm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_unsigned_v4i32_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttps2udqs %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %f)
ret <4 x i32> %x
}
define <8 x i32> @test_unsigned_v8i32_v8f32(<8 x float> %f) nounwind {
-; X86-LABEL: test_unsigned_v8i32_v8f32:
-; X86: # %bb.0:
-; X86-NEXT: vcvttps2udqs %ymm0, %ymm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_unsigned_v8i32_v8f32:
-; X64: # %bb.0:
-; X64-NEXT: vcvttps2udqs %ymm0, %ymm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_unsigned_v8i32_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttps2udqs %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> %f)
ret <8 x i32> %x
}
; VCVTTPS2QQS
define <2 x i64> @test_signed_v2i64_v2f32(<2 x float> %f) nounwind {
-; X86-LABEL: test_signed_v2i64_v2f32:
-; X86: # %bb.0:
-; X86-NEXT: vcvtps2pd %xmm0, %xmm0
-; X86-NEXT: vcvttps2qqs %xmm0, %xmm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_signed_v2i64_v2f32:
-; X64: # %bb.0:
-; X64-NEXT: vcvtps2pd %xmm0, %xmm0
-; X64-NEXT: vcvttps2qqs %xmm0, %xmm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_signed_v2i64_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0
+; CHECK-NEXT: vcvttps2qqs %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %f)
ret <2 x i64> %x
}
define <4 x i64> @test_signed_v4i64_v4f32(<4 x float> %f) nounwind {
-; X86-LABEL: test_signed_v4i64_v4f32:
-; X86: # %bb.0:
-; X86-NEXT: vcvttps2qqs %xmm0, %ymm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_signed_v4i64_v4f32:
-; X64: # %bb.0:
-; X64-NEXT: vcvttps2qqs %xmm0, %ymm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_signed_v4i64_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttps2qqs %xmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> %f)
ret <4 x i64> %x
}
; VCVTTPS2UQQS
define <2 x i64> @test_unsigned_v2i64_v2f32(<2 x float> %f) nounwind {
-; X86-LABEL: test_unsigned_v2i64_v2f32:
-; X86: # %bb.0:
-; X86-NEXT: vcvtps2pd %xmm0, %xmm0
-; X86-NEXT: vcvttps2uqqs %xmm0, %xmm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_unsigned_v2i64_v2f32:
-; X64: # %bb.0:
-; X64-NEXT: vcvtps2pd %xmm0, %xmm0
-; X64-NEXT: vcvttps2uqqs %xmm0, %xmm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_unsigned_v2i64_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0
+; CHECK-NEXT: vcvttps2uqqs %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> %f)
ret <2 x i64> %x
}
define <4 x i64> @test_unsigned_v4i64_v4f32(<4 x float> %f) nounwind {
-; X86-LABEL: test_unsigned_v4i64_v4f32:
-; X86: # %bb.0:
-; X86-NEXT: vcvttps2uqqs %xmm0, %ymm0
-; X86-NEXT: retl
-;
-; X64-LABEL: test_unsigned_v4i64_v4f32:
-; X64: # %bb.0:
-; X64-NEXT: vcvttps2uqqs %xmm0, %ymm0
-; X64-NEXT: retq
+; CHECK-LABEL: test_unsigned_v4i64_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttps2uqqs %xmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
%x = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> %f)
ret <4 x i64> %x
}
>From 94234cbb9e13aa2628f1ee2dd1f15cf74297d750 Mon Sep 17 00:00:00 2001
From: Chauhan Jaydeep Ashwinbhai <chauhan.jaydeep.ashwinbhai at intel.com>
Date: Fri, 3 Jan 2025 20:52:50 +0800
Subject: [PATCH 3/4] Addressed the review comments2
---
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 31 --------------------
llvm/lib/Target/X86/X86InstrAVX10.td | 8 +++++
llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 7 +++++
3 files changed, 15 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 5e736a9bbb7ac3..9b340a778b36ad 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5456,37 +5456,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
break;
}
- case X86ISD::FP_TO_SINT_SAT:
- case X86ISD::FP_TO_UINT_SAT: {
- assert(Subtarget->hasAVX10_2() && "Unsupported node");
- bool IsSigned = Node->getOpcode() == X86ISD::FP_TO_SINT_SAT;
- SDValue Op = Node->getOperand(0);
- EVT VT = Node->getValueType(0);
- EVT OpVT = Op.getValueType();
- MachineSDNode *MachineNode;
-
- if (VT == MVT::v4i32 && OpVT == MVT::v4f32) {
- if (IsSigned)
- MachineNode =
- CurDAG->getMachineNode(X86::VCVTTPD2DQSZ128rr, dl, MVT::v4i32, Op);
- else
- MachineNode =
- CurDAG->getMachineNode(X86::VCVTTPD2UDQSZ128rr, dl, MVT::v4i32, Op);
- }
-
- if ((VT == MVT::v2i64 && OpVT == MVT::v2f64)) {
- if (IsSigned)
- MachineNode =
- CurDAG->getMachineNode(X86::VCVTTPS2QQSZ128rr, dl, MVT::v2i64, Op);
- else
- MachineNode =
- CurDAG->getMachineNode(X86::VCVTTPS2UQQSZ128rr, dl, MVT::v2i64, Op);
- }
-
- SDValue NewNode = SDValue(MachineNode, 0);
- ReplaceNode(Node, NewNode.getNode());
- return;
- }
case X86ISD::ANDNP:
if (tryVPTERNLOG(Node))
return;
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 5b8806593fbb8c..91ba80f4ea6979 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -835,6 +835,8 @@ let Predicates = [HasAVX10_2] in {
// Patterns VCVTTPD2DQSZ128
// VCVTTPD2DQS
+def : Pat<(v4i32(X86fp2sisat(v4f32 VR128X:$src))),
+ (VCVTTPD2DQSZ128rr VR128X:$src)>;
def : Pat<(v4i32(fp_to_sint_sat(v4f64 VR256X:$src), i32)),
(VCVTTPD2DQSZ256rr VR256X:$src)>;
def : Pat<(v8i32(fp_to_sint_sat(v8f64 VR512:$src), i32)),
@@ -849,6 +851,8 @@ def : Pat<(v8i64(fp_to_sint_sat(v8f64 VR512:$src), i64)),
(VCVTTPD2QQSZrr VR512:$src)>;
// VCVTTPD2UDQS
+def : Pat<(v4i32(X86fp2uisat(v4f32 VR128X:$src))),
+ (VCVTTPD2UDQSZ128rr VR128X:$src)>;
def : Pat<(v4i32(fp_to_uint_sat(v4f64 VR256X:$src), i32)),
(VCVTTPD2UDQSZ256rr VR256X:$src)>;
def : Pat<(v8i32(fp_to_uint_sat(v8f64 VR512:$src), i32)),
@@ -871,6 +875,8 @@ def : Pat<(v16i32(fp_to_sint_sat(v16f32 VR512:$src), i32)),
(VCVTTPS2DQSZrr VR512:$src)>;
// VCVTTPS2QQS
+def : Pat<(v2i64(X86fp2sisat(v2f64 VR128X:$src))),
+ (VCVTTPS2QQSZ128rr VR128X:$src)>;
def : Pat<(v4i64(fp_to_sint_sat(v4f32 VR128X:$src), i64)),
(VCVTTPS2QQSZ256rr VR128X:$src)>;
def : Pat<(v8i64(fp_to_sint_sat(v8f32 VR256X:$src), i64)),
@@ -885,6 +891,8 @@ def : Pat<(v16i32(fp_to_uint_sat(v16f32 VR512:$src), i32)),
(VCVTTPS2UDQSZrr VR512:$src)>;
// VCVTTPS2UQQS
+def : Pat<(v2i64(X86fp2uisat(v2f64 VR128X:$src))),
+ (VCVTTPS2UQQSZ128rr VR128X:$src)>;
def : Pat<(v4i64(fp_to_uint_sat(v4f32 VR128X:$src), i64)),
(VCVTTPS2UQQSZ256rr VR128X:$src)>;
def : Pat<(v8i64(fp_to_uint_sat(v8f32 VR256X:$src), i64)),
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index f6231b78f4c2e8..af0267a7d32c3a 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -390,6 +390,13 @@ def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>,
SDTCisFP<0>, SDTCisVT<4, i32>]>;
+def SDTFPToxIntSatOp
+ : SDTypeProfile<1,
+ 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisFP<1>]>;
+
+def X86fp2sisat : SDNode<"X86ISD::FP_TO_SINT_SAT", SDTFPToxIntSatOp>;
+def X86fp2uisat : SDNode<"X86ISD::FP_TO_UINT_SAT", SDTFPToxIntSatOp>;
+
def X86PAlignr : SDNode<"X86ISD::PALIGNR",
SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i8>,
SDTCisSameAs<0,1>,
>From 2a5149ba1ce7c7b6ed14b9bc22335488a6f6a5d1 Mon Sep 17 00:00:00 2001
From: Chauhan Jaydeep Ashwinbhai <chauhan.jaydeep.ashwinbhai at intel.com>
Date: Sat, 4 Jan 2025 17:51:54 +0800
Subject: [PATCH 4/4] Addressed the review comments3
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 22 ++++++++++---------
llvm/lib/Target/X86/X86InstrAVX10.td | 8 +++----
.../CodeGen/X86/avx10_2fptosi_satcvtds.ll | 4 ----
3 files changed, 16 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a009706daede45..267d8701824de7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -33685,15 +33685,14 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
EVT VT = N->getValueType(0);
SDValue Op = N->getOperand(0);
EVT OpVT = Op.getValueType();
- SDValue V4I32;
+ SDValue Res;
if (VT == MVT::v2i32 && OpVT == MVT::v2f64) {
- SDValue V4f32 = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Op);
if (IsSigned)
- V4I32 = DAG.getNode(X86ISD::FP_TO_SINT_SAT, dl, MVT::v4i32, V4f32);
+ Res = DAG.getNode(X86ISD::FP_TO_SINT_SAT, dl, MVT::v4i32, Op);
else
- V4I32 = DAG.getNode(X86ISD::FP_TO_UINT_SAT, dl, MVT::v4i32, V4f32);
- Results.push_back(V4I32);
+ Res = DAG.getNode(X86ISD::FP_TO_UINT_SAT, dl, MVT::v4i32, Op);
+ Results.push_back(Res);
return;
}
break;
@@ -56249,15 +56248,18 @@ static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
SDLoc dl(N);
if (SrcVT == MVT::v2f32 && DstVT == MVT::v2i64) {
- // Convert v2f32 to v2f64
- SDValue V2F64 =
- DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, N->getOperand(0));
+ // Create an undefined value of type v2f32
+ SDValue UndefV2F32Value = DAG.getUNDEF(MVT::v2f32);
+
+ // Concatenate the original v2f32 input and undef v2f32 to create v4f32
+ SDValue NewSrc = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
+ N->getOperand(0), UndefV2F32Value);
// Select the FP_TO_SINT_SAT/FP_TO_UINT_SAT node
if (IsSigned)
- return DAG.getNode(X86ISD::FP_TO_SINT_SAT, dl, MVT::v2i64, V2F64);
+ return DAG.getNode(X86ISD::FP_TO_SINT_SAT, dl, MVT::v2i64, NewSrc);
- return DAG.getNode(X86ISD::FP_TO_UINT_SAT, dl, MVT::v2i64, V2F64);
+ return DAG.getNode(X86ISD::FP_TO_UINT_SAT, dl, MVT::v2i64, NewSrc);
}
return SDValue();
}
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 91ba80f4ea6979..127016184bc17b 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -835,7 +835,7 @@ let Predicates = [HasAVX10_2] in {
// Patterns VCVTTPD2DQSZ128
// VCVTTPD2DQS
-def : Pat<(v4i32(X86fp2sisat(v4f32 VR128X:$src))),
+def : Pat<(v4i32(X86fp2sisat(v2f64 VR128X:$src))),
(VCVTTPD2DQSZ128rr VR128X:$src)>;
def : Pat<(v4i32(fp_to_sint_sat(v4f64 VR256X:$src), i32)),
(VCVTTPD2DQSZ256rr VR256X:$src)>;
@@ -851,7 +851,7 @@ def : Pat<(v8i64(fp_to_sint_sat(v8f64 VR512:$src), i64)),
(VCVTTPD2QQSZrr VR512:$src)>;
// VCVTTPD2UDQS
-def : Pat<(v4i32(X86fp2uisat(v4f32 VR128X:$src))),
+def : Pat<(v4i32(X86fp2uisat(v2f64 VR128X:$src))),
(VCVTTPD2UDQSZ128rr VR128X:$src)>;
def : Pat<(v4i32(fp_to_uint_sat(v4f64 VR256X:$src), i32)),
(VCVTTPD2UDQSZ256rr VR256X:$src)>;
@@ -875,7 +875,7 @@ def : Pat<(v16i32(fp_to_sint_sat(v16f32 VR512:$src), i32)),
(VCVTTPS2DQSZrr VR512:$src)>;
// VCVTTPS2QQS
-def : Pat<(v2i64(X86fp2sisat(v2f64 VR128X:$src))),
+def : Pat<(v2i64(X86fp2sisat(v4f32 VR128X:$src))),
(VCVTTPS2QQSZ128rr VR128X:$src)>;
def : Pat<(v4i64(fp_to_sint_sat(v4f32 VR128X:$src), i64)),
(VCVTTPS2QQSZ256rr VR128X:$src)>;
@@ -891,7 +891,7 @@ def : Pat<(v16i32(fp_to_uint_sat(v16f32 VR512:$src), i32)),
(VCVTTPS2UDQSZrr VR512:$src)>;
// VCVTTPS2UQQS
-def : Pat<(v2i64(X86fp2uisat(v2f64 VR128X:$src))),
+def : Pat<(v2i64(X86fp2uisat(v4f32 VR128X:$src))),
(VCVTTPS2UQQSZ128rr VR128X:$src)>;
def : Pat<(v4i64(fp_to_uint_sat(v4f32 VR128X:$src), i64)),
(VCVTTPS2UQQSZ256rr VR128X:$src)>;
diff --git a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
index a975a239170649..a2f167e94cc23f 100644
--- a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
+++ b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
@@ -117,7 +117,6 @@ define i64 @test_signed_i64_f64(double %f) nounwind {
define <2 x i32> @test_signed_v2i32_v2f64(<2 x double> %d) nounwind {
; CHECK-LABEL: test_signed_v2i32_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0
; CHECK-NEXT: vcvttpd2dqs %xmm0, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
%x = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %d)
@@ -157,7 +156,6 @@ define <4 x i64> @test_signed_v4i64_v4f64(<4 x double> %f) nounwind {
define <2 x i32> @test_unsigned_v2i32_v2f64(<2 x double> %d) nounwind {
; CHECK-LABEL: test_unsigned_v2i32_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0
; CHECK-NEXT: vcvttpd2udqs %xmm0, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
%x = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> %d)
@@ -235,7 +233,6 @@ define <8 x i32> @test_unsigned_v8i32_v8f32(<8 x float> %f) nounwind {
define <2 x i64> @test_signed_v2i64_v2f32(<2 x float> %f) nounwind {
; CHECK-LABEL: test_signed_v2i64_v2f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0
; CHECK-NEXT: vcvttps2qqs %xmm0, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
%x = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %f)
@@ -255,7 +252,6 @@ define <4 x i64> @test_signed_v4i64_v4f32(<4 x float> %f) nounwind {
define <2 x i64> @test_unsigned_v2i64_v2f32(<2 x float> %f) nounwind {
; CHECK-LABEL: test_unsigned_v2i64_v2f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0
; CHECK-NEXT: vcvttps2uqqs %xmm0, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
%x = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> %f)
More information about the llvm-commits
mailing list