[llvm] Map vector saturated converts to public intrinsics (PR #121483)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 2 07:27:43 PST 2025


https://github.com/JaydeepChauhan14 created https://github.com/llvm/llvm-project/pull/121483

None

>From 491755c243f06e12b9ee134334aeda3416839a76 Mon Sep 17 00:00:00 2001
From: Chauhan Jaydeep Ashwinbhai <chauhan.jaydeep.ashwinbhai at intel.com>
Date: Thu, 2 Jan 2025 23:22:45 +0800
Subject: [PATCH] Map vector saturated converts to public intrinsics

---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp       |  33 +++
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  65 ++++-
 llvm/lib/Target/X86/X86ISelLowering.h         |   4 +
 llvm/lib/Target/X86/X86InstrAVX10.td          |  56 ++++
 .../CodeGen/X86/avx10_2_512fptosi_satcvtds.ll | 122 +++++++++
 .../CodeGen/X86/avx10_2fptosi_satcvtds.ll     | 244 ++++++++++++++++++
 6 files changed, 522 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 9b340a778b36ad..0dbae94d3f58e7 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5456,6 +5456,39 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     break;
   }
 
+  case X86ISD::FP_TO_SINT_SAT_CUSTOM:
+  case X86ISD::FP_TO_UINT_SAT_CUSTOM:
+    if (Subtarget->hasAVX10_2()) {
+      bool IsSigned = Node->getOpcode() == X86ISD::FP_TO_SINT_SAT_CUSTOM;
+      SDValue Op = Node->getOperand(0);
+      EVT VT = Node->getValueType(0);
+      EVT OpVT = Op.getValueType();
+      MachineSDNode *MachineNode;
+
+      if (VT == MVT::v4i32 && OpVT == MVT::v4f32) {
+        if (IsSigned)
+          MachineNode = CurDAG->getMachineNode(X86::VCVTTPD2DQSZ128rr, dl,
+                                               MVT::v4i32, Op);
+        else
+          MachineNode = CurDAG->getMachineNode(X86::VCVTTPD2UDQSZ128rr, dl,
+                                               MVT::v4i32, Op);
+      }
+
+      if ((VT == MVT::v2i64 && OpVT == MVT::v2f64)) {
+        if (IsSigned)
+          MachineNode = CurDAG->getMachineNode(X86::VCVTTPS2QQSZ128rr, dl,
+                                               MVT::v2i64, Op);
+        else
+          MachineNode = CurDAG->getMachineNode(X86::VCVTTPS2UQQSZ128rr, dl,
+                                               MVT::v2i64, Op);
+      }
+
+      SDValue NewNode = SDValue(MachineNode, 0);
+      ReplaceNode(Node, NewNode.getNode());
+      return;
+    }
+    break;
+
   case X86ISD::ANDNP:
     if (tryVPTERNLOG(Node))
       return;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a0514e93d6598b..3364043cda0563 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -341,8 +341,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     }
   }
   if (Subtarget.hasAVX10_2()) {
-    setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Legal);
-    setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Legal);
+    setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v2i32, Custom);
+    setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v2i32, Custom);
+    for (MVT VT : {MVT::i32, MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64,
+                   MVT::v4i64, MVT::v8i64}) {
+      setOperationAction(ISD::FP_TO_UINT_SAT, VT, Legal);
+      setOperationAction(ISD::FP_TO_SINT_SAT, VT, Legal);
+    }
     if (Subtarget.is64Bit()) {
       setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Legal);
       setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Legal);
@@ -2656,6 +2661,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
                        ISD::UINT_TO_FP,
                        ISD::STRICT_SINT_TO_FP,
                        ISD::STRICT_UINT_TO_FP,
+                       ISD::FP_TO_SINT_SAT,
+                       ISD::FP_TO_UINT_SAT,
                        ISD::SETCC,
                        ISD::MUL,
                        ISD::XOR,
@@ -33665,6 +33672,30 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     }
     return;
   }
+  case ISD::FP_TO_SINT_SAT:
+  case ISD::FP_TO_UINT_SAT: {
+    if (!Subtarget.hasAVX10_2())
+      return;
+
+    bool IsSigned = Opc == ISD::FP_TO_SINT_SAT;
+    EVT VT = N->getValueType(0);
+    SDValue Op = N->getOperand(0);
+    EVT OpVT = Op.getValueType();
+    SDValue V4I32;
+
+    if (VT == MVT::v2i32 && OpVT == MVT::v2f64) {
+      SDValue V4f32 = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Op);
+      if (IsSigned)
+        V4I32 =
+            DAG.getNode(X86ISD::FP_TO_SINT_SAT_CUSTOM, dl, MVT::v4i32, V4f32);
+      else
+        V4I32 =
+            DAG.getNode(X86ISD::FP_TO_UINT_SAT_CUSTOM, dl, MVT::v4i32, V4f32);
+      Results.push_back(V4I32);
+      return;
+    }
+    break;
+  }
   case ISD::FP_TO_SINT:
   case ISD::STRICT_FP_TO_SINT:
   case ISD::FP_TO_UINT:
@@ -34645,6 +34676,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(VPERMV3)
   NODE_NAME_CASE(VPERMI)
   NODE_NAME_CASE(VPTERNLOG)
+  NODE_NAME_CASE(FP_TO_SINT_SAT_CUSTOM)
+  NODE_NAME_CASE(FP_TO_UINT_SAT_CUSTOM)
   NODE_NAME_CASE(VFIXUPIMM)
   NODE_NAME_CASE(VFIXUPIMM_SAE)
   NODE_NAME_CASE(VFIXUPIMMS)
@@ -56202,6 +56235,32 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+// Custom handling for VCVTTPS2QQS/VCVTTPS2UQQS
+static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
+                                     const X86Subtarget &Subtarget) {
+  if (!Subtarget.hasAVX10_2())
+    return SDValue();
+
+  bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
+  EVT SrcVT = N->getOperand(0).getValueType();
+  EVT DstVT = N->getValueType(0);
+  SDLoc dl(N);
+
+  if (SrcVT == MVT::v2f32 && DstVT == MVT::v2i64) {
+    // Convert v2f32 to v2f64
+    SDValue V2F64 =
+        DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, N->getOperand(0));
+
+    // Select the FP_TO_SINT_SAT_CUSTOM/FP_TO_UINT_SAT_CUSTOM node
+    if (IsSigned)
+      return DAG.getNode(X86ISD::FP_TO_SINT_SAT_CUSTOM, dl, MVT::v2i64, V2F64);
+    else
+      return DAG.getNode(X86ISD::FP_TO_UINT_SAT_CUSTOM, dl, MVT::v2i64, V2F64);
+  }
+
+  return SDValue();
+}
+
 static bool needCarryOrOverflowFlag(SDValue Flags) {
   assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
 
@@ -59315,6 +59374,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::INTRINSIC_WO_CHAIN:  return combineINTRINSIC_WO_CHAIN(N, DAG, DCI);
   case ISD::INTRINSIC_W_CHAIN:  return combineINTRINSIC_W_CHAIN(N, DAG, DCI);
   case ISD::INTRINSIC_VOID:  return combineINTRINSIC_VOID(N, DAG, DCI);
+  case ISD::FP_TO_SINT_SAT:
+  case ISD::FP_TO_UINT_SAT: return combineFP_TO_xINT_SAT(N, DAG, Subtarget);
     // clang-format on
   }
 
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 2b7a8eaf249d83..0c04cf122bddd9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -908,6 +908,10 @@ namespace llvm {
     // Load x87 FPU environment from memory.
     FLDENVm,
 
+    // Custom handling for FP_TO_xINT_SAT
+    FP_TO_SINT_SAT_CUSTOM,
+    FP_TO_UINT_SAT_CUSTOM,
+
     /// This instruction implements FP_TO_SINT with the
     /// integer destination in memory and a FP reg source.  This corresponds
     /// to the X86::FIST*m instructions and the rounding mode change stuff. It
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 3bc64eda01a9ce..e373111fe6c008 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -831,6 +831,62 @@ let Predicates = [HasAVX10_2] in {
 // patterns have been disabled with null_frag.
 // Patterns VCVTTPD2DQSZ128
 
+// VCVTTPD2DQS
+def : Pat<(v4i32(fp_to_sint_sat(v4f64 VR256X:$src), i32)),
+          (VCVTTPD2DQSZ256rr VR256X:$src)>;
+def : Pat<(v8i32(fp_to_sint_sat(v8f64 VR512:$src), i32)),
+          (VCVTTPD2DQSZrr VR512:$src)>;
+
+// VCVTTPD2QQS
+def : Pat<(v2i64(fp_to_sint_sat(v2f64 VR128X:$src), i64)),
+          (VCVTTPD2QQSZ128rr VR128X:$src)>;
+def : Pat<(v4i64(fp_to_sint_sat(v4f64 VR256X:$src), i64)),
+          (VCVTTPD2QQSZ256rr VR256X:$src)>;
+def : Pat<(v8i64(fp_to_sint_sat(v8f64 VR512:$src), i64)),
+          (VCVTTPD2QQSZrr VR512:$src)>;
+
+// VCVTTPD2UDQS
+def : Pat<(v4i32(fp_to_uint_sat(v4f64 VR256X:$src), i32)),
+          (VCVTTPD2UDQSZ256rr VR256X:$src)>;
+def : Pat<(v8i32(fp_to_uint_sat(v8f64 VR512:$src), i32)),
+          (VCVTTPD2UDQSZrr VR512:$src)>;
+
+// VCVTTPD2UQQS
+def : Pat<(v2i64(fp_to_uint_sat(v2f64 VR128X:$src), i64)),
+          (VCVTTPD2UQQSZ128rr VR128X:$src)>;
+def : Pat<(v4i64(fp_to_uint_sat(v4f64 VR256X:$src), i64)),
+          (VCVTTPD2UQQSZ256rr VR256X:$src)>;
+def : Pat<(v8i64(fp_to_uint_sat(v8f64 VR512:$src), i64)),
+          (VCVTTPD2UQQSZrr VR512:$src)>;
+
+// VCVTTPS2DQS
+def : Pat<(v4i32(fp_to_sint_sat(v4f32 VR128X:$src), i32)),
+          (VCVTTPS2DQSZ128rr VR128X:$src)>;
+def : Pat<(v8i32(fp_to_sint_sat(v8f32 VR256X:$src), i32)),
+          (VCVTTPS2DQSZ256rr VR256X:$src)>;
+def : Pat<(v16i32(fp_to_sint_sat(v16f32 VR512:$src), i32)),
+          (VCVTTPS2DQSZrr VR512:$src)>;
+
+// VCVTTPS2QQS
+def : Pat<(v4i64(fp_to_sint_sat(v4f32 VR128X:$src), i64)),
+          (VCVTTPS2QQSZ256rr VR128X:$src)>;
+def : Pat<(v8i64(fp_to_sint_sat(v8f32 VR256X:$src), i64)),
+          (VCVTTPS2QQSZrr VR256X:$src)>;
+
+// VCVTTPS2UDQS
+def : Pat<(v4i32(fp_to_uint_sat(v4f32 VR128X:$src), i32)),
+          (VCVTTPS2UDQSZ128rr VR128X:$src)>;
+def : Pat<(v8i32(fp_to_uint_sat(v8f32 VR256X:$src), i32)),
+          (VCVTTPS2UDQSZ256rr VR256X:$src)>;
+def : Pat<(v16i32(fp_to_uint_sat(v16f32 VR512:$src), i32)),
+          (VCVTTPS2UDQSZrr VR512:$src)>;
+
+// VCVTTPS2UQQS
+def : Pat<(v4i64(fp_to_uint_sat(v4f32 VR128X:$src), i64)),
+          (VCVTTPS2UQQSZ256rr VR128X:$src)>;
+def : Pat<(v8i64(fp_to_uint_sat(v8f32 VR256X:$src), i64)),
+          (VCVTTPS2UQQSZrr VR256X:$src)>;
+
 def : Pat<(v4i32 (X86cvttp2sis (v2f64 VR128X:$src))),
           (VCVTTPD2DQSZ128rr VR128X:$src)>;
 def : Pat<(v4i32 (X86cvttp2sis (loadv2f64 addr:$src))),
diff --git a/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll
new file mode 100644
index 00000000000000..70465a28bad6a7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X64
+
+; VCVTTPD2DQS
+define <8 x i32> @test_signed_v8i32_v8f64(<8 x double> %f) nounwind {
+; X86-LABEL: test_signed_v8i32_v8f64:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttpd2dqs %zmm0, %ymm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_signed_v8i32_v8f64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttpd2dqs %zmm0, %ymm0
+; X64-NEXT:    retq
+  %x = call  <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> %f)
+  ret <8 x i32> %x
+}
+
+; VCVTTPD2QQS
+define <8 x i64> @test_signed_v8i64_v8f64(<8 x double> %f) nounwind {
+; X86-LABEL: test_signed_v8i64_v8f64:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttpd2qqs %zmm0, %zmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_signed_v8i64_v8f64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttpd2qqs %zmm0, %zmm0
+; X64-NEXT:    retq
+  %x = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> %f)
+  ret <8 x i64> %x
+}
+
+; VCVTTPD2UDQS
+define <8 x i32> @test_unsigned_v8i32_v8f64(<8 x double> %f) nounwind {
+; X86-LABEL: test_unsigned_v8i32_v8f64:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttpd2udqs %zmm0, %ymm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_unsigned_v8i32_v8f64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttpd2udqs %zmm0, %ymm0
+; X64-NEXT:    retq
+  %x = call  <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> %f)
+ ret <8 x i32> %x
+}
+
+; VCVTTPD2UQQS
+define <8 x i64> @test_unsigned_v8i64_v8f64(<8 x double> %f) nounwind {
+; X86-LABEL: test_unsigned_v8i64_v8f64:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttpd2uqqs %zmm0, %zmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_unsigned_v8i64_v8f64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttpd2uqqs %zmm0, %zmm0
+; X64-NEXT:    retq
+  %x = call  <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> %f)
+  ret <8 x i64> %x
+}
+
+; VCVTTPS2DQS
+define <16 x i32> @test_signed_v16i32_v16f32(<16 x float> %f) nounwind {
+; X86-LABEL: test_signed_v16i32_v16f32:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttps2dqs %zmm0, %zmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_signed_v16i32_v16f32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttps2dqs %zmm0, %zmm0
+; X64-NEXT:    retq
+  %x = call  <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> %f)
+  ret <16 x i32> %x
+}
+
+; VCVTTPS2UDQS
+define <16 x i32> @test_unsigned_v16i32_v16f32(<16 x float> %f) nounwind {
+; X86-LABEL: test_unsigned_v16i32_v16f32:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttps2udqs %zmm0, %zmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_unsigned_v16i32_v16f32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttps2udqs %zmm0, %zmm0
+; X64-NEXT:    retq
+  %x = call  <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> %f)
+  ret <16 x i32> %x
+}
+; VCVTTPS2QQS
+define <8 x i64> @test_signed_v8i64_v8f32(<8 x float> %f) nounwind {
+; X86-LABEL: test_signed_v8i64_v8f32:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttps2qqs %ymm0, %zmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_signed_v8i64_v8f32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttps2qqs %ymm0, %zmm0
+; X64-NEXT:    retq
+  %x = call  <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> %f)
+  ret <8 x i64> %x
+}
+
+; VCVTTPS2UQQS
+define <8 x i64> @test_unsigned_v8i64_v8f32(<8 x float> %f) nounwind {
+; X86-LABEL: test_unsigned_v8i64_v8f32:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttps2uqqs %ymm0, %zmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_unsigned_v8i64_v8f32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttps2uqqs %ymm0, %zmm0
+; X64-NEXT:    retq
+  %x = call  <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> %f)
+  ret <8 x i64> %x
+}
diff --git a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
index 494e4bc8e068e4..0c731a09f8dbd2 100644
--- a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
+++ b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
@@ -112,3 +112,247 @@ define i64 @test_signed_i64_f64(double %f) nounwind {
     %x = call i64 @llvm.fptosi.sat.i64.f64(double %f)
     ret i64 %x
 }
+
+; VCVTTPD2DQS
+define <2 x i32> @test_signed_v2i32_v2f64(<2 x double> %d) nounwind {
+; X86-LABEL: test_signed_v2i32_v2f64:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvtpd2ps %xmm0, %xmm0
+; X86-NEXT:    vcvttpd2dqs %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_signed_v2i32_v2f64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtpd2ps %xmm0, %xmm0
+; X64-NEXT:    vcvttpd2dqs %xmm0, %xmm0
+; X64-NEXT:    retq
+  %x = call  <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %d)
+  ret <2 x i32> %x
+}
+
+define <4 x i32> @test_signed_v4i32_v4f64(<4 x double> %f) nounwind {
+; X86-LABEL: test_signed_v4i32_v4f64:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttpd2dqs %ymm0, %xmm0
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_signed_v4i32_v4f64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttpd2dqs %ymm0, %xmm0
+; X64-NEXT:    vzeroupper
+; X64-NEXT:    retq
+  %x = call  <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> %f)
+  ret <4 x i32> %x
+}
+
+; VCVTTPD2QQS
+define <2 x i64> @test_signed_v2i64_v2f64(<2 x double> %f) nounwind {
+; X86-LABEL: test_signed_v2i64_v2f64:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttpd2qqs %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_signed_v2i64_v2f64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttpd2qqs %xmm0, %xmm0
+; X64-NEXT:    retq
+    %x = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %f)
+    ret <2 x i64> %x
+}
+
+define <4 x i64> @test_signed_v4i64_v4f64(<4 x double> %f) nounwind {
+; X86-LABEL: test_signed_v4i64_v4f64:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttpd2qqs %ymm0, %ymm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_signed_v4i64_v4f64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttpd2qqs %ymm0, %ymm0
+; X64-NEXT:    retq
+  %x = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> %f)
+  ret <4 x i64> %x
+}
+
+; VCVTTPD2UDQS
+define <2 x i32> @test_unsigned_v2i32_v2f64(<2 x double> %d) nounwind {
+; X86-LABEL: test_unsigned_v2i32_v2f64:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvtpd2ps %xmm0, %xmm0
+; X86-NEXT:    vcvttpd2udqs %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_unsigned_v2i32_v2f64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtpd2ps %xmm0, %xmm0
+; X64-NEXT:    vcvttpd2udqs %xmm0, %xmm0
+; X64-NEXT:    retq
+  %x = call  <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> %d)
+  ret <2 x i32> %x
+}
+
+define <4 x i32> @test_unsigned_v4i32_v4f64(<4 x double> %f) nounwind {
+; X86-LABEL: test_unsigned_v4i32_v4f64:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttpd2udqs %ymm0, %xmm0
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_unsigned_v4i32_v4f64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttpd2udqs %ymm0, %xmm0
+; X64-NEXT:    vzeroupper
+; X64-NEXT:    retq
+  %x = call  <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> %f)
+  ret <4 x i32> %x
+}
+
+; VCVTTPD2UQQS
+define <2 x i64> @test_unsigned_v2i64_v2f64(<2 x double> %f) nounwind {
+; X86-LABEL: test_unsigned_v2i64_v2f64:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttpd2uqqs %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_unsigned_v2i64_v2f64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttpd2uqqs %xmm0, %xmm0
+; X64-NEXT:    retq
+  %x = call  <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %f)
+  ret <2 x i64> %x
+}
+
+define <4 x i64> @test_unsigned_v4i64_v4f64(<4 x double> %f) nounwind {
+; X86-LABEL: test_unsigned_v4i64_v4f64:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttpd2uqqs %ymm0, %ymm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_unsigned_v4i64_v4f64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttpd2uqqs %ymm0, %ymm0
+; X64-NEXT:    retq
+  %x = call  <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> %f)
+  ret <4 x i64> %x
+}
+
+; VCVTTPS2DQS
+define <4 x i32> @test_signed_v4i32_v4f32(<4 x float> %f) nounwind {
+; X86-LABEL: test_signed_v4i32_v4f32:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttps2dqs %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_signed_v4i32_v4f32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttps2dqs %xmm0, %xmm0
+; X64-NEXT:    retq
+  %x = call  <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %f)
+  ret <4 x i32> %x
+}
+
+define <8 x i32> @test_signed_v8i32_v8f32(<8 x float> %f) nounwind {
+; X86-LABEL: test_signed_v8i32_v8f32:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttps2dqs %ymm0, %ymm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_signed_v8i32_v8f32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttps2dqs %ymm0, %ymm0
+; X64-NEXT:    retq
+  %x = call  <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> %f)
+  ret <8 x i32> %x
+}
+
+; VCVTTPS2UDQS
+define <4 x i32> @test_unsigned_v4i32_v4f32(<4 x float> %f) nounwind {
+; X86-LABEL: test_unsigned_v4i32_v4f32:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttps2udqs %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_unsigned_v4i32_v4f32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttps2udqs %xmm0, %xmm0
+; X64-NEXT:    retq
+  %x = call  <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %f)
+  ret <4 x i32> %x
+}
+
+define <8 x i32> @test_unsigned_v8i32_v8f32(<8 x float> %f) nounwind {
+; X86-LABEL: test_unsigned_v8i32_v8f32:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttps2udqs %ymm0, %ymm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_unsigned_v8i32_v8f32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttps2udqs %ymm0, %ymm0
+; X64-NEXT:    retq
+  %x = call  <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> %f)
+  ret <8 x i32> %x
+}
+
+; VCVTTPS2QQS
+define <2 x i64> @test_signed_v2i64_v2f32(<2 x float> %f) nounwind {
+; X86-LABEL: test_signed_v2i64_v2f32:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvtps2pd %xmm0, %xmm0
+; X86-NEXT:    vcvttps2qqs %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_signed_v2i64_v2f32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtps2pd %xmm0, %xmm0
+; X64-NEXT:    vcvttps2qqs %xmm0, %xmm0
+; X64-NEXT:    retq
+  %x = call  <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %f)
+  ret <2 x i64> %x
+}
+
+define <4 x i64> @test_signed_v4i64_v4f32(<4 x float> %f) nounwind {
+; X86-LABEL: test_signed_v4i64_v4f32:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttps2qqs %xmm0, %ymm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_signed_v4i64_v4f32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttps2qqs %xmm0, %ymm0
+; X64-NEXT:    retq
+  %x = call  <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> %f)
+  ret <4 x i64> %x
+}
+
+; VCVTTPS2UQQS
+define <2 x i64> @test_unsigned_v2i64_v2f32(<2 x float> %f) nounwind {
+; X86-LABEL: test_unsigned_v2i64_v2f32:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvtps2pd %xmm0, %xmm0
+; X86-NEXT:    vcvttps2uqqs %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_unsigned_v2i64_v2f32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtps2pd %xmm0, %xmm0
+; X64-NEXT:    vcvttps2uqqs %xmm0, %xmm0
+; X64-NEXT:    retq
+  %x = call  <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> %f)
+  ret <2 x i64> %x
+}
+
+define <4 x i64> @test_unsigned_v4i64_v4f32(<4 x float> %f) nounwind {
+; X86-LABEL: test_unsigned_v4i64_v4f32:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttps2uqqs %xmm0, %ymm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_unsigned_v4i64_v4f32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttps2uqqs %xmm0, %ymm0
+; X64-NEXT:    retq
+  %x = call  <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> %f)
+  ret <4 x i64> %x
+}



More information about the llvm-commits mailing list