[llvm] b1d8576 - This adds constrained intrinsics for the signed and unsigned conversions
Kevin P. Neal via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 17 07:07:05 PST 2019
Author: Kevin P. Neal
Date: 2019-12-17T10:06:51-05:00
New Revision: b1d8576b0a9fa1f6a1173c0b5c2f379389e01e3f
URL: https://github.com/llvm/llvm-project/commit/b1d8576b0a9fa1f6a1173c0b5c2f379389e01e3f
DIFF: https://github.com/llvm/llvm-project/commit/b1d8576b0a9fa1f6a1173c0b5c2f379389e01e3f.diff
LOG: This adds constrained intrinsics for the signed and unsigned conversions
of integers to floating point.
This includes some of Craig Topper's changes for promotion support from
D71130.
Differential Revision: https://reviews.llvm.org/D69275
Added:
Modified:
llvm/docs/LangRef.rst
llvm/include/llvm/CodeGen/ISDOpcodes.h
llvm/include/llvm/CodeGen/SelectionDAG.h
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/include/llvm/IR/ConstrainedOps.def
llvm/include/llvm/IR/Intrinsics.td
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/lib/IR/Verifier.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/fp-intrinsics.ll
llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
llvm/test/Feature/fp-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 2ef0cd7986ee..5bc07cae65f2 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -15667,6 +15667,78 @@ Semantics:
The result produced is a signed integer converted from the floating
point operand. The value is truncated, so it is rounded towards zero.
+'``llvm.experimental.constrained.uitofp``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+ declare <ty2>
+ @llvm.experimental.constrained.uitofp(<type> <value>,
+ metadata <rounding mode>,
+ metadata <exception behavior>)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.constrained.uitofp``' intrinsic converts an
+unsigned integer ``value`` to a floating-point of type ``ty2``.
+
+Arguments:
+""""""""""
+
+The first argument to the '``llvm.experimental.constrained.uitofp``'
+intrinsic must be an :ref:`integer <t_integer>` or :ref:`vector
+<t_vector>` of integer values.
+
+The second and third arguments specify the rounding mode and exception
+behavior as described above.
+
+Semantics:
+""""""""""
+
+An inexact floating-point exception will be raised if rounding is required.
+Any result produced is a floating point value converted from the input
+integer operand.
+
+'``llvm.experimental.constrained.sitofp``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+ declare <ty2>
+ @llvm.experimental.constrained.sitofp(<type> <value>,
+ metadata <rounding mode>,
+ metadata <exception behavior>)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.constrained.sitofp``' intrinsic converts a
+signed integer ``value`` to a floating-point of type ``ty2``.
+
+Arguments:
+""""""""""
+
+The first argument to the '``llvm.experimental.constrained.sitofp``'
+intrinsic must be an :ref:`integer <t_integer>` or :ref:`vector
+<t_vector>` of integer values.
+
+The second and third arguments specify the rounding mode and exception
+behavior as described above.
+
+Semantics:
+""""""""""
+
+An inexact floating-point exception will be raised if rounding is required.
+Any result produced is a floating point value converted from the input
+integer operand.
+
'``llvm.experimental.constrained.fptrunc``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 7650ca074767..d3494f552231 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -312,6 +312,13 @@ namespace ISD {
STRICT_FP_TO_SINT,
STRICT_FP_TO_UINT,
+ /// STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to
+ /// a floating point value. These have the same semantics as sitofp and
+ /// uitofp in IR.
+ /// They are used to limit optimizations while the DAG is being optimized.
+ STRICT_SINT_TO_FP,
+ STRICT_UINT_TO_FP,
+
/// X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating
/// point type down to the precision of the destination VT. TRUNC is a
/// flag, which is always an integer that is zero or one. If TRUNC is 0,
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 30b7b478d4f2..3bfde5b4ce1d 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -811,6 +811,11 @@ class SelectionDAG {
/// float type VT, by either extending or rounding (by truncation).
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT);
+ /// Convert Op, which must be a STRICT operation of float type, to the
+ /// float type VT, by either extending or rounding (by truncation).
+ std::pair<SDValue, SDValue>
+ getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT);
+
/// Convert Op, which must be of integer type, to the
/// integer type VT, by either any-extending or truncating it.
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 0726bdfec20e..98498ea67a62 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4123,14 +4123,18 @@ class TargetLowering : public TargetLoweringBase {
/// Expand float to UINT conversion
/// \param N Node to expand
/// \param Result output after conversion
+ /// \param Chain output chain after conversion
/// \returns True, if the expansion was successful, false otherwise
- bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const;
+ bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain,
+ SelectionDAG &DAG) const;
/// Expand UINT(i64) to double(f64) conversion
/// \param N Node to expand
/// \param Result output after conversion
+ /// \param Chain output chain after conversion
/// \returns True, if the expansion was successful, false otherwise
- bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+ bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain,
+ SelectionDAG &DAG) const;
/// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const;
diff --git a/llvm/include/llvm/IR/ConstrainedOps.def b/llvm/include/llvm/IR/ConstrainedOps.def
index 062cf479551f..d0d062b81437 100644
--- a/llvm/include/llvm/IR/ConstrainedOps.def
+++ b/llvm/include/llvm/IR/ConstrainedOps.def
@@ -41,6 +41,8 @@ INSTRUCTION(FMul, 2, 1, experimental_constrained_fmul, FMUL)
INSTRUCTION(FDiv, 2, 1, experimental_constrained_fdiv, FDIV)
INSTRUCTION(FRem, 2, 1, experimental_constrained_frem, FREM)
INSTRUCTION(FPExt, 1, 0, experimental_constrained_fpext, FP_EXTEND)
+INSTRUCTION(SIToFP, 1, 1, experimental_constrained_sitofp, SINT_TO_FP)
+INSTRUCTION(UIToFP, 1, 1, experimental_constrained_uitofp, UINT_TO_FP)
INSTRUCTION(FPToSI, 1, 0, experimental_constrained_fptosi, FP_TO_SINT)
INSTRUCTION(FPToUI, 1, 0, experimental_constrained_fptoui, FP_TO_UINT)
INSTRUCTION(FPTrunc, 1, 1, experimental_constrained_fptrunc, FP_ROUND)
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 7563aa132b8d..1d8f97cc8a19 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -640,6 +640,16 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
[ llvm_anyfloat_ty,
llvm_metadata_ty ]>;
+ def int_experimental_constrained_sitofp : Intrinsic<[ llvm_anyfloat_ty ],
+ [ llvm_anyint_ty,
+ llvm_metadata_ty,
+ llvm_metadata_ty ]>;
+
+ def int_experimental_constrained_uitofp : Intrinsic<[ llvm_anyfloat_ty ],
+ [ llvm_anyint_ty,
+ llvm_metadata_ty,
+ llvm_metadata_ty ]>;
+
def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ],
[ llvm_anyfloat_ty,
llvm_metadata_ty,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 62fa18024e0c..87c26debb4db 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -173,10 +173,9 @@ class SelectionDAGLegalize {
SDValue NewIntValue) const;
SDValue ExpandFCOPYSIGN(SDNode *Node) const;
SDValue ExpandFABS(SDNode *Node) const;
- SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT,
- const SDLoc &dl);
- SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
- const SDLoc &dl);
+ SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain);
+ void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl,
+ SmallVectorImpl<SDValue> &Results);
void PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl,
SmallVectorImpl<SDValue> &Results);
@@ -1010,6 +1009,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::STRICT_LRINT:
case ISD::STRICT_LLRINT:
case ISD::STRICT_LROUND:
@@ -2338,9 +2339,14 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
/// INT_TO_FP operation of the specified operand when the target requests that
/// we expand it. At this point, we know that the result and operand types are
/// legal for the target.
-SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
- EVT DestVT,
- const SDLoc &dl) {
+SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
+ SDValue &Chain) {
+ bool isSigned = (Node->getOpcode() == ISD::STRICT_SINT_TO_FP ||
+ Node->getOpcode() == ISD::SINT_TO_FP);
+ EVT DestVT = Node->getValueType(0);
+ SDLoc dl(Node);
+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+ SDValue Op0 = Node->getOperand(OpNo);
EVT SrcVT = Op0.getValueType();
// TODO: Should any fast-math-flags be set for the created nodes?
@@ -2387,16 +2393,38 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
BitsToDouble(0x4330000080000000ULL) :
BitsToDouble(0x4330000000000000ULL),
dl, MVT::f64);
- // subtract the bias
- SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
- // final result
- SDValue Result = DAG.getFPExtendOrRound(Sub, dl, DestVT);
+ // Subtract the bias and get the final result.
+ SDValue Sub;
+ SDValue Result;
+ if (Node->isStrictFPOpcode()) {
+ Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other},
+ {Node->getOperand(0), Load, Bias});
+ if (DestVT != Sub.getValueType()) {
+ std::pair<SDValue, SDValue> ResultPair;
+ ResultPair =
+ DAG.getStrictFPExtendOrRound(Sub, SDValue(Node, 1), dl, DestVT);
+ Result = ResultPair.first;
+ Chain = ResultPair.second;
+ }
+ else
+ Result = Sub;
+ } else {
+ Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
+ Result = DAG.getFPExtendOrRound(Sub, dl, DestVT);
+ }
return Result;
}
assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
// Code below here assumes !isSigned without checking again.
+ // FIXME: This can produce slightly incorrect results. See details in
+ // FIXME: https://reviews.llvm.org/D69275
- SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+ SDValue Tmp1;
+ if (Node->isStrictFPOpcode()) {
+ Tmp1 = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DestVT, MVT::Other },
+ { Node->getOperand(0), Op0 });
+ } else
+ Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(SrcVT), Op0,
DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
@@ -2442,6 +2470,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
FudgeInReg = Handle.getValue();
}
+ if (Node->isStrictFPOpcode()) {
+ SDValue Result = DAG.getNode(ISD::STRICT_FADD, dl, { DestVT, MVT::Other },
+ { Tmp1.getValue(1), Tmp1, FudgeInReg });
+ Chain = Result.getValue(1);
+ return Result;
+ }
+
return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
}
@@ -2450,9 +2485,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
/// we promote it. At this point, we know that the result and operand types are
/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
/// operation that takes a larger input.
-SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
- bool isSigned,
- const SDLoc &dl) {
+void SelectionDAGLegalize::PromoteLegalINT_TO_FP(
+ SDNode *N, const SDLoc &dl, SmallVectorImpl<SDValue> &Results) {
+ bool IsStrict = N->isStrictFPOpcode();
+ bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP ||
+ N->getOpcode() == ISD::STRICT_SINT_TO_FP;
+ EVT DestVT = N->getValueType(0);
+ SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0);
+ unsigned UIntOp = IsStrict ? ISD::STRICT_UINT_TO_FP : ISD::UINT_TO_FP;
+ unsigned SIntOp = IsStrict ? ISD::STRICT_SINT_TO_FP : ISD::SINT_TO_FP;
+
// First step, figure out the appropriate *INT_TO_FP operation to use.
EVT NewInTy = LegalOp.getValueType();
@@ -2464,15 +2506,16 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
assert(NewInTy.isInteger() && "Ran out of possibilities!");
// If the target supports SINT_TO_FP of this type, use it.
- if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {
- OpToUse = ISD::SINT_TO_FP;
+ if (TLI.isOperationLegalOrCustom(SIntOp, NewInTy)) {
+ OpToUse = SIntOp;
break;
}
- if (isSigned) continue;
+ if (IsSigned)
+ continue;
// If the target supports UINT_TO_FP of this type, use it.
- if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {
- OpToUse = ISD::UINT_TO_FP;
+ if (TLI.isOperationLegalOrCustom(UIntOp, NewInTy)) {
+ OpToUse = UIntOp;
break;
}
@@ -2481,9 +2524,20 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
// Okay, we found the operation and type to use. Zero extend our input to the
// desired type then run the operation on it.
- return DAG.getNode(OpToUse, dl, DestVT,
- DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
- dl, NewInTy, LegalOp));
+ if (IsStrict) {
+ SDValue Res =
+ DAG.getNode(OpToUse, dl, {DestVT, MVT::Other},
+ {N->getOperand(0),
+ DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, NewInTy, LegalOp)});
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ }
+
+ Results.push_back(
+ DAG.getNode(OpToUse, dl, DestVT,
+ DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, NewInTy, LegalOp)));
}
/// This function is responsible for legalizing a
@@ -2899,15 +2953,20 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::UINT_TO_FP:
- if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) {
+ case ISD::STRICT_UINT_TO_FP:
+ if (TLI.expandUINT_TO_FP(Node, Tmp1, Tmp2, DAG)) {
Results.push_back(Tmp1);
+ if (Node->isStrictFPOpcode())
+ Results.push_back(Tmp2);
break;
}
LLVM_FALLTHROUGH;
case ISD::SINT_TO_FP:
- Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
- Node->getOperand(0), Node->getValueType(0), dl);
+ case ISD::STRICT_SINT_TO_FP:
+ Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2);
Results.push_back(Tmp1);
+ if (Node->isStrictFPOpcode())
+ Results.push_back(Tmp2);
break;
case ISD::FP_TO_SINT:
if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
@@ -4194,6 +4253,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
OVT = Node->getOperand(0).getSimpleValueType();
}
+ if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP ||
+ Node->getOpcode() == ISD::STRICT_SINT_TO_FP)
+ OVT = Node->getOperand(1).getSimpleValueType();
if (Node->getOpcode() == ISD::BR_CC)
OVT = Node->getOperand(2).getSimpleValueType();
MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
@@ -4248,10 +4310,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
PromoteLegalFP_TO_INT(Node, dl, Results);
break;
case ISD::UINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
- Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),
- Node->getOpcode() == ISD::SINT_TO_FP, dl);
- Results.push_back(Tmp1);
+ case ISD::STRICT_SINT_TO_FP:
+ PromoteLegalINT_TO_FP(Node, dl, Results);
break;
case ISD::VAARG: {
SDValue Chain = Node->getOperand(0); // Get the chain.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 1ef62921ab8d..0292f1428a09 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -307,23 +307,27 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
return TranslateLegalizeResults(Op, Result);
TargetLowering::LegalizeAction Action = TargetLowering::Legal;
+ EVT ValVT;
switch (Op.getOpcode()) {
default:
return TranslateLegalizeResults(Op, Result);
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN:
#include "llvm/IR/ConstrainedOps.def"
- Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ ValVT = Node->getValueType(0);
+ if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
+ ValVT = Node->getOperand(1).getValueType();
+ Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
// If we're asked to expand a strict vector floating-point operation,
// by default we're going to simply unroll it. That is usually the
// best approach, except in the case where the resulting strict (scalar)
// operations would themselves use the fallback mutation to non-strict.
// In that specific case, just do the fallback on the vector op.
if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
- TLI.getStrictFPOperationAction(Node->getOpcode(),
- Node->getValueType(0))
- == TargetLowering::Legal) {
- EVT EltVT = Node->getValueType(0).getVectorElementType();
+ TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) ==
+ TargetLowering::Legal) {
+ EVT EltVT = ValVT.getVectorElementType();
if (TLI.getOperationAction(Node->getOpcode(), EltVT)
== TargetLowering::Expand &&
TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
@@ -1153,18 +1157,29 @@ SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) {
}
SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
- EVT VT = Op.getOperand(0).getValueType();
+ bool IsStrict = Op.getNode()->isStrictFPOpcode();
+ unsigned OpNo = IsStrict ? 1 : 0;
+ SDValue Src = Op.getOperand(OpNo);
+ EVT VT = Src.getValueType();
SDLoc DL(Op);
// Attempt to expand using TargetLowering.
SDValue Result;
- if (TLI.expandUINT_TO_FP(Op.getNode(), Result, DAG))
+ SDValue Chain;
+ if (TLI.expandUINT_TO_FP(Op.getNode(), Result, Chain, DAG)) {
+ if (IsStrict)
+ // Relink the chain
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain);
return Result;
+ }
// Make sure that the SINT_TO_FP and SRL instructions are available.
- if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
- TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
- return DAG.UnrollVectorOp(Op.getNode());
+ if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) ==
+ TargetLowering::Expand) ||
+ (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) ==
+ TargetLowering::Expand)) ||
+ TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
+ return IsStrict ? SDValue() : DAG.UnrollVectorOp(Op.getNode());
unsigned BW = VT.getScalarSizeInBits();
assert((BW == 64 || BW == 32) &&
@@ -1182,8 +1197,31 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType());
// Clear upper part of LO, lower HI
- SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
- SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
+ SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord);
+ SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask);
+
+ if (IsStrict) {
+ // Convert hi and lo to floats
+ // Convert the hi part back to the upper values
+ // TODO: Can any fast-math-flags be set on these nodes?
+ SDValue fHI =
+ DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {Op.getValueType(), MVT::Other},
+ {Op.getOperand(0), HI});
+ fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Op.getValueType(), MVT::Other},
+ {SDValue(fHI.getNode(), 1), fHI, TWOHW});
+ SDValue fLO =
+ DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {Op.getValueType(), MVT::Other},
+ {SDValue(fHI.getNode(), 1), LO});
+
+ // Add the two halves
+ SDValue Result =
+ DAG.getNode(ISD::STRICT_FADD, DL, {Op.getValueType(), MVT::Other},
+ {SDValue(fLO.getNode(), 1), fHI, fLO});
+
+ // Relink the chain
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), SDValue(Result.getNode(), 1));
+ return Result;
+ }
// Convert hi and lo to floats
// Convert the hi part back to the upper values
@@ -1318,7 +1356,12 @@ SDValue VectorLegalizer::ExpandFixedPointMul(SDValue Op) {
}
SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
- EVT VT = Op.getValueType();
+ if (Op.getOpcode() == ISD::STRICT_UINT_TO_FP) {
+ if (SDValue Res = ExpandUINT_TO_FLOAT(Op))
+ return Res;
+ }
+
+ EVT VT = Op.getValue(0).getValueType();
EVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
unsigned NumOpers = Op.getNumOperands();
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 200dacd27bc6..3741cae63e5f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -572,6 +572,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::UINT_TO_FP:
Res = ScalarizeVecOp_UnaryOp(N);
break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
@@ -1931,9 +1933,12 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VSELECT:
Res = SplitVecOp_VSELECT(N, OpNo);
break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
- if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType()))
+ if (N->getValueType(0).bitsLT(
+ N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType()))
Res = SplitVecOp_TruncateHelper(N);
else
Res = SplitVecOp_UnaryOp(N);
@@ -2494,7 +2499,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
//
// Without this transform, the original truncate would end up being
// scalarized, which is pretty much always a last resort.
- SDValue InVec = N->getOperand(0);
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+ SDValue InVec = N->getOperand(OpNo);
EVT InVT = InVec->getValueType(0);
EVT OutVT = N->getValueType(0);
unsigned NumElements = OutVT.getVectorNumElements();
@@ -2538,8 +2544,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
NumElements/2);
- SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
- SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
+
+ SDValue HalfLo;
+ SDValue HalfHi;
+ SDValue Chain;
+ if (N->isStrictFPOpcode()) {
+ HalfLo = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
+ {N->getOperand(0), HalfLo});
+ HalfHi = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
+ {N->getOperand(0), HalfHi});
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, HalfLo.getValue(1),
+ HalfHi.getValue(1));
+ } else {
+ HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
+ HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
+ }
// Concatenate them to get the full intermediate truncation result.
EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
@@ -2548,6 +2569,17 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
// type. This should normally be something that ends up being legal directly,
// but in theory if a target has very wide vectors and an annoyingly
// restricted set of legal types, this split can chain to build things up.
+
+ if (N->isStrictFPOpcode()) {
+ SDValue Res = DAG.getNode(
+ ISD::STRICT_FP_ROUND, DL, {OutVT, MVT::Other},
+ {Chain, InterVec,
+ DAG.getTargetConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()))});
+ // Relink the chain
+ ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1));
+ return Res;
+ }
+
return IsFloat
? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
DAG.getTargetConstant(
@@ -3000,6 +3032,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
case ISD::STRICT_FP_ROUND:
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
return WidenVecRes_Convert_StrictFP(N);
default:
break;
@@ -4120,7 +4154,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_TO_UINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::SINT_TO_FP:
+ case ISD::STRICT_SINT_TO_FP:
case ISD::UINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::TRUNCATE:
Res = WidenVecOp_Convert(N);
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 472540765fe3..c33233efb023 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1117,6 +1117,20 @@ SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) {
: getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL));
}
+std::pair<SDValue, SDValue>
+SelectionDAG::getStrictFPExtendOrRound(SDValue Op, SDValue Chain,
+ const SDLoc &DL, EVT VT) {
+ assert(!VT.bitsEq(Op.getValueType()) &&
+ "Strict no-op FP extend/round not allowed.");
+ SDValue Res =
+ VT.bitsGT(Op.getValueType())
+ ? getNode(ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, {Chain, Op})
+ : getNode(ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other},
+ {Chain, Op, getIntPtrConstant(0, DL)});
+
+ return std::pair<SDValue, SDValue>(Res, SDValue(Res.getNode(), 1));
+}
+
SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
return VT.bitsGT(Op.getValueType()) ?
getNode(ISD::ANY_EXTEND, DL, VT, Op) :
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 94e091189dbb..b5f8a3bc934c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -326,7 +326,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STRICT_FP_EXTEND: return "strict_fp_extend";
case ISD::SINT_TO_FP: return "sint_to_fp";
+ case ISD::STRICT_SINT_TO_FP: return "strict_sint_to_fp";
case ISD::UINT_TO_FP: return "uint_to_fp";
+ case ISD::STRICT_UINT_TO_FP: return "strict_uint_to_fp";
case ISD::FP_TO_SINT: return "fp_to_sint";
case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint";
case ISD::FP_TO_UINT: return "fp_to_uint";
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 888f1527d1c0..53cbe9d060e9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6116,8 +6116,10 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
}
bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
+ SDValue &Chain,
SelectionDAG &DAG) const {
- SDValue Src = Node->getOperand(0);
+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+ SDValue Src = Node->getOperand(OpNo);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
@@ -6140,7 +6142,13 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
// For unsigned conversions, convert them to signed conversions using the
// algorithm from the x86_64 __floatundidf in compiler_rt.
- SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
+ SDValue Fast;
+ if (Node->isStrictFPOpcode()) {
+ Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DstVT, MVT::Other},
+ {Node->getOperand(0), Src});
+ Chain = SDValue(Fast.getNode(), 1);
+ } else
+ Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
@@ -6148,8 +6156,17 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
- SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
- SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
+ SDValue Slow;
+ if (Node->isStrictFPOpcode()) {
+ SDValue SignCvt = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl,
+ {DstVT, MVT::Other}, {Chain, Or});
+ Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other },
+ { SignCvt.getValue(1), SignCvt, SignCvt });
+ Chain = Slow.getValue(1);
+ } else {
+ SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
+ Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
+ }
// TODO: This really should be implemented using a branch rather than a
// select. We happen to get lucky and machinesink does the right
@@ -6192,8 +6209,18 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
- SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
- Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
+ if (Node->isStrictFPOpcode()) {
+ SDValue HiSub =
+ DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other},
+ {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52});
+ Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other},
+ {HiSub.getValue(1), LoFlt, HiSub});
+ Chain = Result.getValue(1);
+ } else {
+ SDValue HiSub =
+ DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
+ Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
+ }
return true;
}
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 3a7da8650836..e3a3d91b455b 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -4804,6 +4804,28 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
}
break;
+ case Intrinsic::experimental_constrained_sitofp:
+ case Intrinsic::experimental_constrained_uitofp: {
+ Value *Operand = FPI.getArgOperand(0);
+ uint64_t NumSrcElem = 0;
+ Assert(Operand->getType()->isIntOrIntVectorTy(),
+ "Intrinsic first argument must be integer", &FPI);
+ if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
+ NumSrcElem = OperandT->getNumElements();
+ }
+
+ Operand = &FPI;
+ Assert((NumSrcElem > 0) == Operand->getType()->isVectorTy(),
+ "Intrinsic first argument and result disagree on vector use", &FPI);
+ Assert(Operand->getType()->isFPOrFPVectorTy(),
+ "Intrinsic result must be a floating point", &FPI);
+ if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
+ Assert(NumSrcElem == OperandT->getNumElements(),
+ "Intrinsic first argument and result vector lengths must be equal",
+ &FPI);
+ }
+ } break;
+
case Intrinsic::experimental_constrained_fptrunc:
case Intrinsic::experimental_constrained_fpext: {
Value *Operand = FPI.getArgOperand(0);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2dcd2eec3853..8df28ae2503e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -228,26 +228,34 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (!Subtarget.useSoftFloat()) {
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
// operation.
- setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
// We have an algorithm for SSE2, and we turn this into a 64-bit
// FILD or VCVTUSI2SS/SD for other targets.
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
// We have an algorithm for SSE2->double, and we turn this into a
// 64-bit FILD followed by conditional FADD for other targets.
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
// Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
// this operation.
- setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
// SSE has no i16 to fp conversion, only i32. We promote in the handler
// to allow f80 to use i16 and f64 to use i16 with sse1 only
setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
// f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
// are Legal, f80 is custom lowered.
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
// Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
// this operation.
@@ -985,9 +993,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
// Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
@@ -18421,8 +18432,13 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
assert((Op.getOpcode() == ISD::SINT_TO_FP ||
- Op.getOpcode() == ISD::UINT_TO_FP) && "Unexpected opcode!");
- SDValue Src = Op.getOperand(0);
+ Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_UINT_TO_FP ||
+ Op.getOpcode() == ISD::UINT_TO_FP) &&
+ "Unexpected opcode!");
+ bool IsStrict = Op->isStrictFPOpcode();
+ unsigned OpNo = IsStrict ? 1 : 0;
+ SDValue Src = Op.getOperand(OpNo);
MVT SrcVT = Src.getSimpleValueType();
MVT VT = Op.getSimpleValueType();
@@ -18439,7 +18455,17 @@ static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
SDLoc dl(Op);
SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecInVT, Src);
+ if (IsStrict) {
+ SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {VecVT, MVT::Other},
+ {Op.getOperand(0), InVec});
+ SDValue Chain = CvtVec.getValue(1);
+ SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
+ DAG.getIntPtrConstant(0, dl));
+ return DAG.getMergeValues({Value, Chain}, dl);
+ }
+
SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, VecVT, InVec);
+
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
DAG.getIntPtrConstant(0, dl));
}
@@ -18510,7 +18536,9 @@ static SDValue vectorizeExtractedCast(SDValue Cast, SelectionDAG &DAG,
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
- SDValue Src = Op.getOperand(0);
+ bool IsStrict = Op->isStrictFPOpcode();
+ unsigned OpNo = IsStrict ? 1 : 0;
+ SDValue Src = Op.getOperand(OpNo);
MVT SrcVT = Src.getSimpleValueType();
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
@@ -18519,7 +18547,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
return Extract;
if (SrcVT.isVector()) {
- if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
+ if (SrcVT == MVT::v2i32 && VT == MVT::v2f64 && !IsStrict) {
+ // FIXME: A strict version of CVTSI2P is needed.
return DAG.getNode(X86ISD::CVTSI2P, dl, VT,
DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
DAG.getUNDEF(SrcVT)));
@@ -18545,13 +18574,17 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
// SSE doesn't have an i16 conversion so we need to promote.
if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) {
SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Src);
+ if (IsStrict)
+ return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
+ {Op.getOperand(0), Ext});
+
return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Ext);
}
if (VT == MVT::f128)
return LowerF128Call(Op, DAG, RTLIB::getSINTTOFP(SrcVT, VT));
- SDValue ValueToStore = Op.getOperand(0);
+ SDValue ValueToStore = Src;
if (SrcVT == MVT::i64 && UseSSEReg && !Subtarget.is64Bit())
// Bitcasting to f64 here allows us to do a single 64-bit store from
// an SSE register, avoiding the store forwarding penalty that would come
@@ -18563,10 +18596,16 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
auto PtrVT = getPointerTy(MF.getDataLayout());
int SSFI = MF.getFrameInfo().CreateStackObject(Size, Size, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
- SDValue Chain = DAG.getStore(
- DAG.getEntryNode(), dl, ValueToStore, StackSlot,
+ SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode();
+ Chain = DAG.getStore(
+ Chain, dl, ValueToStore, StackSlot,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
- return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG).first;
+ std::pair<SDValue, SDValue> Tmp = BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
+
+ if (IsStrict)
+ return DAG.getMergeValues({Tmp.first, Tmp.second}, dl);
+
+ return Tmp.first;
}
std::pair<SDValue, SDValue> X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
@@ -18654,6 +18693,8 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
#endif
*/
+ bool IsStrict = Op->isStrictFPOpcode();
+ unsigned OpNo = IsStrict ? 1 : 0;
SDLoc dl(Op);
LLVMContext *Context = DAG.getContext();
@@ -18674,8 +18715,8 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, 16);
// Load the 64-bit value into an XMM register.
- SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
- Op.getOperand(0));
+ SDValue XR1 =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Op.getOperand(OpNo));
SDValue CLod0 =
DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
@@ -18688,32 +18729,50 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
/* Alignment = */ 16);
SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1);
+ SDValue Sub;
+ SDValue Chain;
// TODO: Are there any fast-math-flags to propagate here?
- SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
+ if (IsStrict) {
+ Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::v2f64, MVT::Other},
+ {Op.getOperand(0), XR2F, CLod1});
+ Chain = Sub.getValue(1);
+ } else
+ Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
SDValue Result;
- if (Subtarget.hasSSE3() && shouldUseHorizontalOp(true, DAG, Subtarget)) {
+ if (!IsStrict && Subtarget.hasSSE3() &&
+ shouldUseHorizontalOp(true, DAG, Subtarget)) {
+ // FIXME: Do we need a STRICT version of FHADD?
Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
} else {
SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1});
- Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
+ if (IsStrict) {
+ Result = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::v2f64, MVT::Other},
+ {Chain, Shuffle, Sub});
+ Chain = Result.getValue(1);
+ } else
+ Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
}
+ Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
+ DAG.getIntPtrConstant(0, dl));
+ if (IsStrict)
+ return DAG.getMergeValues({Result, Chain}, dl);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
- DAG.getIntPtrConstant(0, dl));
+ return Result;
}
/// 32-bit unsigned integer to float expansion.
static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
+ unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;
SDLoc dl(Op);
// FP constant to bias correct the final result.
SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl,
MVT::f64);
// Load the 32-bit value into an XMM register.
- SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
- Op.getOperand(0));
+ SDValue Load =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Op.getOperand(OpNo));
// Zero out the upper parts of the register.
Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG);
@@ -18733,6 +18792,23 @@ static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl));
+ if (Op.getNode()->isStrictFPOpcode()) {
+ // Subtract the bias.
+ // TODO: Are there any fast-math-flags to propagate here?
+ SDValue Chain = Op.getOperand(0);
+ SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other},
+ {Chain, Or, Bias});
+
+ if (Op.getValueType() == Sub.getValueType())
+ return Sub;
+
+ // Handle final rounding.
+ std::pair<SDValue, SDValue> ResultPair = DAG.getStrictFPExtendOrRound(
+ Sub, Sub.getValue(1), dl, Op.getSimpleValueType());
+
+ return DAG.getMergeValues({ResultPair.first, ResultPair.second}, dl);
+ }
+
// Subtract the bias.
// TODO: Are there any fast-math-flags to propagate here?
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
@@ -18747,6 +18823,10 @@ static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG,
if (Op.getSimpleValueType() != MVT::v2f64)
return SDValue();
+ // FIXME: Need to fix the lack of StrictFP support here.
+ if (Op.getNode()->isStrictFPOpcode())
+ return SDValue();
+
SDValue N0 = Op.getOperand(0);
assert(N0.getSimpleValueType() == MVT::v2i32 && "Unexpected input type");
@@ -18873,7 +18953,8 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- SDValue N0 = Op.getOperand(0);
+ unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;
+ SDValue N0 = Op.getOperand(OpNo);
MVT SrcVT = N0.getSimpleValueType();
SDLoc dl(Op);
@@ -18891,11 +18972,14 @@ static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG,
SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
- SDValue N0 = Op.getOperand(0);
+ bool IsStrict = Op->isStrictFPOpcode();
+ unsigned OpNo = IsStrict ? 1 : 0;
+ SDValue Src = Op.getOperand(OpNo);
SDLoc dl(Op);
auto PtrVT = getPointerTy(DAG.getDataLayout());
- MVT SrcVT = N0.getSimpleValueType();
+ MVT SrcVT = Src.getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();
+ SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
if (DstVT == MVT::f128)
return LowerF128Call(Op, DAG, RTLIB::getUINTTOFP(SrcVT, DstVT));
@@ -18915,8 +18999,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// Promote i32 to i64 and use a signed conversion on 64-bit targets.
if (SrcVT == MVT::i32 && Subtarget.is64Bit()) {
- N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, N0);
- return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, N0);
+ Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Src);
+ if (IsStrict)
+ return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DstVT, MVT::Other},
+ {Chain, Src});
+ return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
}
if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
@@ -18933,22 +19020,28 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
if (SrcVT == MVT::i32) {
SDValue OffsetSlot = DAG.getMemBasePlusOffset(StackSlot, 4, dl);
- SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
- StackSlot, MachinePointerInfo());
+ SDValue Store1 =
+ DAG.getStore(Chain, dl, Src, StackSlot, MachinePointerInfo());
SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32),
OffsetSlot, MachinePointerInfo());
- return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG).first;
+ std::pair<SDValue, SDValue> Tmp =
+ BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+ if (IsStrict)
+ return DAG.getMergeValues({Tmp.first, Tmp.second}, dl);
+
+ return Tmp.first;
}
assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
- SDValue ValueToStore = Op.getOperand(0);
- if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit())
+ SDValue ValueToStore = Src;
+ if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit()) {
// Bitcasting to f64 here allows us to do a single 64-bit store from
// an SSE register, avoiding the store forwarding penalty that would come
// with two 32-bit stores.
ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, ValueToStore, StackSlot,
- MachinePointerInfo());
+ }
+ SDValue Store =
+ DAG.getStore(Chain, dl, ValueToStore, StackSlot, MachinePointerInfo());
// For i64 source, we need to add the appropriate power of 2 if the input
// was negative. This is the same as the optimization in
// DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
@@ -18963,13 +19056,14 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDValue Ops[] = { Store, StackSlot };
SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops,
MVT::i64, MMO);
+ Chain = Fild.getValue(1);
APInt FF(32, 0x5F800000ULL);
// Check whether the sign bit is set.
SDValue SignSet = DAG.getSetCC(
dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
- Op.getOperand(0), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
+ Op.getOperand(OpNo), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
// Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
SDValue FudgePtr = DAG.getConstantPool(
@@ -18984,11 +19078,18 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// Load the value out, extending it from f32 to f80.
// FIXME: Avoid the extend by constructing the right constant pool?
SDValue Fudge = DAG.getExtLoad(
- ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), FudgePtr,
+ ISD::EXTLOAD, dl, MVT::f80, Chain, FudgePtr,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
/* Alignment = */ 4);
+ Chain = Fudge.getValue(1);
// Extend everything to 80 bits to force it to be done on x87.
// TODO: Are there any fast-math-flags to propagate here?
+ if (IsStrict) {
+ SDValue Add = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::f80, MVT::Other},
+ {Chain, Fild, Fudge});
+ return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {DstVT, MVT::Other},
+ {Add.getValue(1), Add, DAG.getIntPtrConstant(0, dl)});
+ }
SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
DAG.getIntPtrConstant(0, dl));
@@ -19042,10 +19143,7 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
int SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
- if (IsStrict)
- Chain = Op.getOperand(0);
- else
- Chain = DAG.getEntryNode();
+ Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment.
@@ -28013,7 +28111,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::FSHL:
case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG);
+ case ISD::STRICT_SINT_TO_FP:
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::STRICT_UINT_TO_FP:
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG);
diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll
index eb903abdf8fb..124a43f53ddf 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll
@@ -1941,6 +1941,762 @@ entry:
ret i64 %result
}
+; Verify that sitofp(%x) isn't simplified when the rounding mode is
+; unknown.
+; Verify that no gross errors happen.
+define double @sifdb(i8 %x) #0 {
+; X87-LABEL: sifdb:
+; X87: # %bb.0: # %entry
+; X87-NEXT: pushl %eax
+; X87-NEXT: .cfi_def_cfa_offset 8
+; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: popl %eax
+; X87-NEXT: .cfi_def_cfa_offset 4
+; X87-NEXT: retl
+;
+; X86-SSE-LABEL: sifdb:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: subl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 16
+; X86-SSE-NEXT: movsbl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
+; X86-SSE-NEXT: movsd %xmm0, (%esp)
+; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: addl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: sifdb:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movsbl %dil, %eax
+; SSE-NEXT: cvtsi2sd %eax, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sifdb:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: movsbl %dil, %eax
+; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %result = call double @llvm.experimental.constrained.sitofp.f64.i8(i8 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret double %result
+}
+
+define double @sifdw(i16 %x) #0 {
+; X87-LABEL: sifdw:
+; X87: # %bb.0: # %entry
+; X87-NEXT: pushl %eax
+; X87-NEXT: .cfi_def_cfa_offset 8
+; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: popl %eax
+; X87-NEXT: .cfi_def_cfa_offset 4
+; X87-NEXT: retl
+;
+; X86-SSE-LABEL: sifdw:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: subl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 16
+; X86-SSE-NEXT: movswl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
+; X86-SSE-NEXT: movsd %xmm0, (%esp)
+; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: addl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: sifdw:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movswl %di, %eax
+; SSE-NEXT: cvtsi2sd %eax, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sifdw:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: movswl %di, %eax
+; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %result = call double @llvm.experimental.constrained.sitofp.f64.i16(i16 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret double %result
+}
+
+define double @sifdi(i32 %x) #0 {
+; X87-LABEL: sifdi:
+; X87: # %bb.0: # %entry
+; X87-NEXT: pushl %eax
+; X87-NEXT: .cfi_def_cfa_offset 8
+; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X87-NEXT: movl %eax, (%esp)
+; X87-NEXT: fildl (%esp)
+; X87-NEXT: popl %eax
+; X87-NEXT: .cfi_def_cfa_offset 4
+; X87-NEXT: retl
+;
+; X86-SSE-LABEL: sifdi:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: subl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 16
+; X86-SSE-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0
+; X86-SSE-NEXT: movsd %xmm0, (%esp)
+; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: addl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: sifdi:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: cvtsi2sd %edi, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sifdi:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %result = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret double %result
+}
+
+define float @siffb(i8 %x) #0 {
+; X87-LABEL: siffb:
+; X87: # %bb.0: # %entry
+; X87-NEXT: pushl %eax
+; X87-NEXT: .cfi_def_cfa_offset 8
+; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: popl %eax
+; X87-NEXT: .cfi_def_cfa_offset 4
+; X87-NEXT: retl
+;
+; X86-SSE-LABEL: siffb:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: pushl %eax
+; X86-SSE-NEXT: .cfi_def_cfa_offset 8
+; X86-SSE-NEXT: movsbl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
+; X86-SSE-NEXT: movss %xmm0, (%esp)
+; X86-SSE-NEXT: flds (%esp)
+; X86-SSE-NEXT: popl %eax
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: siffb:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movsbl %dil, %eax
+; SSE-NEXT: cvtsi2ss %eax, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: siffb:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: movsbl %dil, %eax
+; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %result = call float @llvm.experimental.constrained.sitofp.f32.i8(i8 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret float %result
+}
+
+define float @siffw(i16 %x) #0 {
+; X87-LABEL: siffw:
+; X87: # %bb.0: # %entry
+; X87-NEXT: pushl %eax
+; X87-NEXT: .cfi_def_cfa_offset 8
+; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: popl %eax
+; X87-NEXT: .cfi_def_cfa_offset 4
+; X87-NEXT: retl
+;
+; X86-SSE-LABEL: siffw:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: pushl %eax
+; X86-SSE-NEXT: .cfi_def_cfa_offset 8
+; X86-SSE-NEXT: movswl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
+; X86-SSE-NEXT: movss %xmm0, (%esp)
+; X86-SSE-NEXT: flds (%esp)
+; X86-SSE-NEXT: popl %eax
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: siffw:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movswl %di, %eax
+; SSE-NEXT: cvtsi2ss %eax, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: siffw:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: movswl %di, %eax
+; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %result = call float @llvm.experimental.constrained.sitofp.f32.i16(i16 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret float %result
+}
+
+define float @siffi(i32 %x) #0 {
+; X87-LABEL: siffi:
+; X87: # %bb.0: # %entry
+; X87-NEXT: pushl %eax
+; X87-NEXT: .cfi_def_cfa_offset 8
+; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X87-NEXT: movl %eax, (%esp)
+; X87-NEXT: fildl (%esp)
+; X87-NEXT: popl %eax
+; X87-NEXT: .cfi_def_cfa_offset 4
+; X87-NEXT: retl
+;
+; X86-SSE-LABEL: siffi:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: pushl %eax
+; X86-SSE-NEXT: .cfi_def_cfa_offset 8
+; X86-SSE-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
+; X86-SSE-NEXT: movss %xmm0, (%esp)
+; X86-SSE-NEXT: flds (%esp)
+; X86-SSE-NEXT: popl %eax
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: siffi:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: cvtsi2ss %edi, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: siffi:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %result = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret float %result
+}
+
+define double @sifdl(i64 %x) #0 {
+; X87-LABEL: sifdl:
+; X87: # %bb.0: # %entry
+; X87-NEXT: subl $12, %esp
+; X87-NEXT: .cfi_def_cfa_offset 16
+; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X87-NEXT: movl %eax, (%esp)
+; X87-NEXT: fildll (%esp)
+; X87-NEXT: addl $12, %esp
+; X87-NEXT: .cfi_def_cfa_offset 4
+; X87-NEXT: retl
+;
+; X86-SSE-LABEL: sifdl:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: subl $20, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 24
+; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: fstpl (%esp)
+; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: addl $20, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: sifdl:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: cvtsi2sd %rdi, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sifdl:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %result = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret double %result
+}
+
+define float @siffl(i64 %x) #0 {
+; X87-LABEL: siffl:
+; X87: # %bb.0: # %entry
+; X87-NEXT: subl $12, %esp
+; X87-NEXT: .cfi_def_cfa_offset 16
+; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X87-NEXT: movl %eax, (%esp)
+; X87-NEXT: fildll (%esp)
+; X87-NEXT: addl $12, %esp
+; X87-NEXT: .cfi_def_cfa_offset 4
+; X87-NEXT: retl
+;
+; X86-SSE-LABEL: siffl:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: subl $20, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 24
+; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: flds {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: addl $20, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: siffl:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: cvtsi2ss %rdi, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: siffl:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %result = call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret float %result
+}
+
+; Verify that uitofp(%x) isn't simplified when the rounding mode is
+; unknown.
+; Verify that no gross errors happen.
+define double @uifdb(i8 %x) #0 {
+; X87-LABEL: uifdb:
+; X87: # %bb.0: # %entry
+; X87-NEXT: pushl %eax
+; X87-NEXT: .cfi_def_cfa_offset 8
+; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: popl %eax
+; X87-NEXT: .cfi_def_cfa_offset 4
+; X87-NEXT: retl
+;
+; X86-SSE-LABEL: uifdb:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: subl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 16
+; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
+; X86-SSE-NEXT: movsd %xmm0, (%esp)
+; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: addl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: uifdb:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movzbl %dil, %eax
+; SSE-NEXT: cvtsi2sd %eax, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: uifdb:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: movzbl %dil, %eax
+; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %result = call double @llvm.experimental.constrained.uitofp.f64.i8(i8 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret double %result
+}
+
+define double @uifdw(i16 %x) #0 {
+; X87-LABEL: uifdw:
+; X87: # %bb.0: # %entry
+; X87-NEXT: pushl %eax
+; X87-NEXT: .cfi_def_cfa_offset 8
+; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X87-NEXT: movl %eax, (%esp)
+; X87-NEXT: fildl (%esp)
+; X87-NEXT: popl %eax
+; X87-NEXT: .cfi_def_cfa_offset 4
+; X87-NEXT: retl
+;
+; X86-SSE-LABEL: uifdw:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: subl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 16
+; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
+; X86-SSE-NEXT: movsd %xmm0, (%esp)
+; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: addl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: uifdw:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movzwl %di, %eax
+; SSE-NEXT: cvtsi2sd %eax, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: uifdw:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: movzwl %di, %eax
+; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %result = call double @llvm.experimental.constrained.uitofp.f64.i16(i16 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret double %result
+}
+
+define double @uifdi(i32 %x) #0 {
+; X87-LABEL: uifdi:
+; X87: # %bb.0: # %entry
+; X87-NEXT: subl $12, %esp
+; X87-NEXT: .cfi_def_cfa_offset 16
+; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X87-NEXT: movl %eax, (%esp)
+; X87-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X87-NEXT: fildll (%esp)
+; X87-NEXT: addl $12, %esp
+; X87-NEXT: .cfi_def_cfa_offset 4
+; X87-NEXT: retl
+;
+; X86-SSE-LABEL: uifdi:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: subl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 16
+; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT: orpd %xmm0, %xmm1
+; X86-SSE-NEXT: subsd %xmm0, %xmm1
+; X86-SSE-NEXT: movsd %xmm1, (%esp)
+; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: addl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: uifdi:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: cvtsi2sd %rax, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: uifdi:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: movl %edi, %eax
+; AVX1-NEXT: vcvtsi2sd %rax, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: uifdi:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0
+; AVX512-NEXT: retq
+entry:
+ %result = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret double %result
+}
+
+define double @uifdl(i64 %x) #0 {
+; X87-LABEL: uifdl:
+; X87: # %bb.0: # %entry
+; X87-NEXT: subl $20, %esp
+; X87-NEXT: .cfi_def_cfa_offset 24
+; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X87-NEXT: movl %eax, (%esp)
+; X87-NEXT: xorl %eax, %eax
+; X87-NEXT: testl %ecx, %ecx
+; X87-NEXT: setns %al
+; X87-NEXT: fildll (%esp)
+; X87-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
+; X87-NEXT: fstpl {{[0-9]+}}(%esp)
+; X87-NEXT: fldl {{[0-9]+}}(%esp)
+; X87-NEXT: addl $20, %esp
+; X87-NEXT: .cfi_def_cfa_offset 4
+; X87-NEXT: retl
+;
+; X86-SSE-LABEL: uifdl:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: subl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 16
+; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X86-SSE-NEXT: subpd {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT: movapd %xmm0, %xmm1
+; X86-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; X86-SSE-NEXT: addpd %xmm0, %xmm1
+; X86-SSE-NEXT: movlpd %xmm1, (%esp)
+; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: addl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: uifdl:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movq %rdi, %xmm1
+; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; SSE-NEXT: subpd {{.*}}(%rip), %xmm1
+; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; SSE-NEXT: addpd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: uifdl:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vmovq %rdi, %xmm0
+; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: uifdl:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0
+; AVX512-NEXT: retq
+entry:
+ %result = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret double %result
+}
+
+define float @uiffb(i8 %x) #0 {
+; X87-LABEL: uiffb:
+; X87: # %bb.0: # %entry
+; X87-NEXT: pushl %eax
+; X87-NEXT: .cfi_def_cfa_offset 8
+; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: popl %eax
+; X87-NEXT: .cfi_def_cfa_offset 4
+; X87-NEXT: retl
+;
+; X86-SSE-LABEL: uiffb:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: pushl %eax
+; X86-SSE-NEXT: .cfi_def_cfa_offset 8
+; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
+; X86-SSE-NEXT: movss %xmm0, (%esp)
+; X86-SSE-NEXT: flds (%esp)
+; X86-SSE-NEXT: popl %eax
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: uiffb:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movzbl %dil, %eax
+; SSE-NEXT: cvtsi2ss %eax, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: uiffb:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: movzbl %dil, %eax
+; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %result = call float @llvm.experimental.constrained.uitofp.f32.i8(i8 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret float %result
+}
+
+define float @uiffw(i16 %x) #0 {
+; X87-LABEL: uiffw:
+; X87: # %bb.0: # %entry
+; X87-NEXT: pushl %eax
+; X87-NEXT: .cfi_def_cfa_offset 8
+; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X87-NEXT: movl %eax, (%esp)
+; X87-NEXT: fildl (%esp)
+; X87-NEXT: popl %eax
+; X87-NEXT: .cfi_def_cfa_offset 4
+; X87-NEXT: retl
+;
+; X86-SSE-LABEL: uiffw:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: pushl %eax
+; X86-SSE-NEXT: .cfi_def_cfa_offset 8
+; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
+; X86-SSE-NEXT: movss %xmm0, (%esp)
+; X86-SSE-NEXT: flds (%esp)
+; X86-SSE-NEXT: popl %eax
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: uiffw:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movzwl %di, %eax
+; SSE-NEXT: cvtsi2ss %eax, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: uiffw:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: movzwl %di, %eax
+; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %result = call float @llvm.experimental.constrained.uitofp.f32.i16(i16 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret float %result
+}
+
+define float @uiffi(i32 %x) #0 {
+; X87-LABEL: uiffi:
+; X87: # %bb.0: # %entry
+; X87-NEXT: subl $12, %esp
+; X87-NEXT: .cfi_def_cfa_offset 16
+; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X87-NEXT: movl %eax, (%esp)
+; X87-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X87-NEXT: fildll (%esp)
+; X87-NEXT: addl $12, %esp
+; X87-NEXT: .cfi_def_cfa_offset 4
+; X87-NEXT: retl
+;
+; X86-SSE-LABEL: uiffi:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: pushl %eax
+; X86-SSE-NEXT: .cfi_def_cfa_offset 8
+; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT: orpd %xmm0, %xmm1
+; X86-SSE-NEXT: subsd %xmm0, %xmm1
+; X86-SSE-NEXT: xorps %xmm0, %xmm0
+; X86-SSE-NEXT: cvtsd2ss %xmm1, %xmm0
+; X86-SSE-NEXT: movss %xmm0, (%esp)
+; X86-SSE-NEXT: flds (%esp)
+; X86-SSE-NEXT: popl %eax
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: uiffi:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: cvtsi2ss %rax, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: uiffi:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: movl %edi, %eax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: uiffi:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0
+; AVX512-NEXT: retq
+entry:
+ %result = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret float %result
+}
+
+define float @uiffl(i64 %x) #0 {
+; X87-LABEL: uiffl:
+; X87: # %bb.0: # %entry
+; X87-NEXT: subl $20, %esp
+; X87-NEXT: .cfi_def_cfa_offset 24
+; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X87-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X87-NEXT: xorl %eax, %eax
+; X87-NEXT: testl %ecx, %ecx
+; X87-NEXT: setns %al
+; X87-NEXT: fildll {{[0-9]+}}(%esp)
+; X87-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
+; X87-NEXT: fstps {{[0-9]+}}(%esp)
+; X87-NEXT: flds {{[0-9]+}}(%esp)
+; X87-NEXT: addl $20, %esp
+; X87-NEXT: .cfi_def_cfa_offset 4
+; X87-NEXT: retl
+;
+; X86-SSE-LABEL: uiffl:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: subl $20, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 24
+; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: xorl %eax, %eax
+; X86-SSE-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: setns %al
+; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
+; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE-NEXT: movss %xmm0, (%esp)
+; X86-SSE-NEXT: flds (%esp)
+; X86-SSE-NEXT: addl $20, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: uiffl:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: testq %rdi, %rdi
+; SSE-NEXT: js .LBB52_1
+; SSE-NEXT: # %bb.2: # %entry
+; SSE-NEXT: cvtsi2ss %rdi, %xmm0
+; SSE-NEXT: retq
+; SSE-NEXT: .LBB52_1:
+; SSE-NEXT: movq %rdi, %rax
+; SSE-NEXT: shrq %rax
+; SSE-NEXT: andl $1, %edi
+; SSE-NEXT: orq %rax, %rdi
+; SSE-NEXT: cvtsi2ss %rdi, %xmm0
+; SSE-NEXT: addss %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: uiffl:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: testq %rdi, %rdi
+; AVX1-NEXT: js .LBB52_1
+; AVX1-NEXT: # %bb.2: # %entry
+; AVX1-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
+; AVX1-NEXT: retq
+; AVX1-NEXT: .LBB52_1:
+; AVX1-NEXT: movq %rdi, %rax
+; AVX1-NEXT: shrq %rax
+; AVX1-NEXT: andl $1, %edi
+; AVX1-NEXT: orq %rax, %rdi
+; AVX1-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
+; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: uiffl:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0
+; AVX512-NEXT: retq
+entry:
+ %result = call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret float %result
+}
+
attributes #0 = { strictfp }
@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
@@ -1981,3 +2737,19 @@ declare i32 @llvm.experimental.constrained.lround.i32.f64(double, metadata)
declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata)
declare i64 @llvm.experimental.constrained.llround.i64.f64(double, metadata)
declare i64 @llvm.experimental.constrained.llround.i64.f32(float, metadata)
+declare double @llvm.experimental.constrained.sitofp.f64.i8(i8, metadata, metadata)
+declare double @llvm.experimental.constrained.sitofp.f64.i16(i16, metadata, metadata)
+declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata)
+declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata)
+declare float @llvm.experimental.constrained.sitofp.f32.i8(i8, metadata, metadata)
+declare float @llvm.experimental.constrained.sitofp.f32.i16(i16, metadata, metadata)
+declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata)
+declare float @llvm.experimental.constrained.sitofp.f32.i64(i64, metadata, metadata)
+declare double @llvm.experimental.constrained.uitofp.f64.i8(i8, metadata, metadata)
+declare double @llvm.experimental.constrained.uitofp.f64.i16(i16, metadata, metadata)
+declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata)
+declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata)
+declare float @llvm.experimental.constrained.uitofp.f32.i8(i8, metadata, metadata)
+declare float @llvm.experimental.constrained.uitofp.f32.i16(i16, metadata, metadata)
+declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata)
+declare float @llvm.experimental.constrained.uitofp.f32.i64(i64, metadata, metadata)
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index 8ad4aad204cf..ccdda7399a03 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -5786,6 +5786,1513 @@ entry:
ret <3 x double> %trunc
}
+define <1 x double> @constrained_vector_sitofp_v1f64_v1i32(<1 x i32> %x) #0 {
+; CHECK-LABEL: constrained_vector_sitofp_v1f64_v1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvtsi2sd %edi, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_sitofp_v1f64_v1i32:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %result = call <1 x double>
+ @llvm.experimental.constrained.sitofp.v1f64.v1i32(<1 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <1 x double> %result
+}
+
+define <1 x float> @constrained_vector_sitofp_v1f32_v1i32(<1 x i32> %x) #0 {
+; CHECK-LABEL: constrained_vector_sitofp_v1f32_v1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvtsi2ss %edi, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_sitofp_v1f32_v1i32:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %result = call <1 x float>
+ @llvm.experimental.constrained.sitofp.v1f32.v1i32(<1 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <1 x float> %result
+}
+
+define <1 x double> @constrained_vector_sitofp_v1f64_v1i64(<1 x i64> %x) #0 {
+; CHECK-LABEL: constrained_vector_sitofp_v1f64_v1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvtsi2sd %rdi, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_sitofp_v1f64_v1i64:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %result = call <1 x double>
+ @llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <1 x double> %result
+}
+
+define <1 x float> @constrained_vector_sitofp_v1f32_v1i64(<1 x i64> %x) #0 {
+; CHECK-LABEL: constrained_vector_sitofp_v1f32_v1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvtsi2ss %rdi, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_sitofp_v1f32_v1i64:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %result = call <1 x float>
+ @llvm.experimental.constrained.sitofp.v1f32.v1i64(<1 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <1 x float> %result
+}
+
+define <2 x double> @constrained_vector_sitofp_v2f64_v2i32(<2 x i32> %x) #0 {
+; CHECK-LABEL: constrained_vector_sitofp_v2f64_v2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: cvtsi2sd %eax, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2sd %eax, %xmm0
+; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_sitofp_v2f64_v2i32:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vextractps $1, %xmm0, %eax
+; AVX-NEXT: vcvtsi2sd %eax, %xmm1, %xmm1
+; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: vcvtsi2sd %eax, %xmm2, %xmm0
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
+entry:
+ %result = call <2 x double>
+ @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <2 x double> %result
+}
+
+define <2 x float> @constrained_vector_sitofp_v2f32_v2i32(<2 x i32> %x) #0 {
+; CHECK-LABEL: constrained_vector_sitofp_v2f32_v2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: cvtsi2ss %eax, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2ss %eax, %xmm0
+; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_sitofp_v2f32_v2i32:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vextractps $1, %xmm0, %eax
+; AVX-NEXT: vcvtsi2ss %eax, %xmm1, %xmm1
+; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: vcvtsi2ss %eax, %xmm2, %xmm0
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX-NEXT: retq
+entry:
+ %result = call <2 x float>
+ @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <2 x float> %result
+}
+
+define <2 x double> @constrained_vector_sitofp_v2f64_v2i64(<2 x i64> %x) #0 {
+; CHECK-LABEL: constrained_vector_sitofp_v2f64_v2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %xmm0, %rax
+; CHECK-NEXT: cvtsi2sd %rax, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: movq %xmm0, %rax
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2sd %rax, %xmm0
+; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_sitofp_v2f64_v2i64:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
+entry:
+ %result = call <2 x double>
+ @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <2 x double> %result
+}
+
+define <2 x float> @constrained_vector_sitofp_v2f32_v2i64(<2 x i64> %x) #0 {
+; CHECK-LABEL: constrained_vector_sitofp_v2f32_v2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %xmm0, %rax
+; CHECK-NEXT: cvtsi2ss %rax, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: movq %xmm0, %rax
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2ss %rax, %xmm0
+; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_sitofp_v2f32_v2i64:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX-NEXT: retq
+entry:
+ %result = call <2 x float>
+ @llvm.experimental.constrained.sitofp.v2f32.v2i64(<2 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <2 x float> %result
+}
+
+define <3 x double> @constrained_vector_sitofp_v3f64_v3i32(<3 x i32> %x) #0 {
+; CHECK-LABEL: constrained_vector_sitofp_v3f64_v3i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: cvtsi2sd %eax, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: cvtsi2sd %eax, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2sd %eax, %xmm0
+; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movapd %xmm2, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_sitofp_v3f64_v3i32:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vextractps $1, %xmm0, %eax
+; AVX-NEXT: vcvtsi2sd %eax, %xmm1, %xmm1
+; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: vcvtsi2sd %eax, %xmm2, %xmm2
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX-NEXT: vpextrd $2, %xmm0, %eax
+; AVX-NEXT: vcvtsi2sd %eax, %xmm3, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX-NEXT: retq
+entry:
+ %result = call <3 x double>
+ @llvm.experimental.constrained.sitofp.v3f64.v3i32(<3 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <3 x double> %result
+}
+
+define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
+; CHECK-LABEL: constrained_vector_sitofp_v3f32_v3i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: cvtsi2ss %eax, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; CHECK-NEXT: movd %xmm2, %eax
+; CHECK-NEXT: xorps %xmm2, %xmm2
+; CHECK-NEXT: cvtsi2ss %eax, %xmm2
+; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2ss %eax, %xmm0
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_sitofp_v3f32_v3i32:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vextractps $1, %xmm0, %eax
+; AVX-NEXT: vcvtsi2ss %eax, %xmm1, %xmm1
+; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: vcvtsi2ss %eax, %xmm2, %xmm2
+; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX-NEXT: vpextrd $2, %xmm0, %eax
+; AVX-NEXT: vcvtsi2ss %eax, %xmm3, %xmm0
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; AVX-NEXT: retq
+entry:
+ %result = call <3 x float>
+ @llvm.experimental.constrained.sitofp.v3f32.v3i32(<3 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <3 x float> %result
+}
+
+define <3 x double> @constrained_vector_sitofp_v3f64_v3i64(<3 x i64> %x) #0 {
+; CHECK-LABEL: constrained_vector_sitofp_v3f64_v3i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvtsi2sd %rdi, %xmm0
+; CHECK-NEXT: cvtsi2sd %rsi, %xmm1
+; CHECK-NEXT: cvtsi2sd %rdx, %xmm2
+; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_sitofp_v3f64_v3i64:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
+; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: constrained_vector_sitofp_v3f64_v3i64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
+; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
+; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
+; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX512-NEXT: retq
+entry:
+ %result = call <3 x double>
+ @llvm.experimental.constrained.sitofp.v3f64.v3i64(<3 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <3 x double> %result
+}
+
+define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
+; CHECK-LABEL: constrained_vector_sitofp_v3f32_v3i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvtsi2ss %rsi, %xmm1
+; CHECK-NEXT: cvtsi2ss %rdi, %xmm0
+; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: cvtsi2ss %rdx, %xmm1
+; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_sitofp_v3f32_v3i64:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
+; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: constrained_vector_sitofp_v3f32_v3i64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
+; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
+; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+entry:
+ %result = call <3 x float>
+ @llvm.experimental.constrained.sitofp.v3f32.v3i64(<3 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <3 x float> %result
+}
+
+define <4 x double> @constrained_vector_sitofp_v4f64_v4i32(<4 x i32> %x) #0 {
+; CHECK-LABEL: constrained_vector_sitofp_v4f64_v4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: cvtsi2sd %eax, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: cvtsi2sd %eax, %xmm1
+; CHECK-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: cvtsi2sd %eax, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: cvtsi2sd %eax, %xmm1
+; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; CHECK-NEXT: movapd %xmm2, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_sitofp_v4f64_v4i32:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX-NEXT: retq
+entry:
+ %result = call <4 x double>
+ @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x double> %result
+}
+
+define <4 x float> @constrained_vector_sitofp_v4f32_v4i32(<4 x i32> %x) #0 {
+; CHECK-LABEL: constrained_vector_sitofp_v4f32_v4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_sitofp_v4f32_v4i32:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %result = call <4 x float>
+ @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x float> %result
+}
+
+define <4 x double> @constrained_vector_sitofp_v4f64_v4i64(<4 x i64> %x) #0 {
+; CHECK-LABEL: constrained_vector_sitofp_v4f64_v4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %xmm0, %rax
+; CHECK-NEXT: cvtsi2sd %rax, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: movq %xmm0, %rax
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2sd %rax, %xmm0
+; CHECK-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; CHECK-NEXT: movq %xmm1, %rax
+; CHECK-NEXT: cvtsi2sd %rax, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; CHECK-NEXT: movq %xmm0, %rax
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2sd %rax, %xmm0
+; CHECK-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
+; CHECK-NEXT: movapd %xmm2, %xmm0
+; CHECK-NEXT: movapd %xmm3, %xmm1
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_sitofp_v4f64_v4i64:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpextrq $1, %xmm1, %rax
+; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
+; AVX1-NEXT: vmovq %xmm1, %rax
+; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
+; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
+; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: constrained_vector_sitofp_v4f64_v4i64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT: vpextrq $1, %xmm1, %rax
+; AVX512-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
+; AVX512-NEXT: vmovq %xmm1, %rax
+; AVX512-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
+; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
+; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
+; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+entry:
+ %result = call <4 x double>
+ @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x double> %result
+}
+
+define <4 x float> @constrained_vector_sitofp_v4f32_v4i64(<4 x i64> %x) #0 {
+; CHECK-LABEL: constrained_vector_sitofp_v4f32_v4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %xmm1, %rax
+; CHECK-NEXT: cvtsi2ss %rax, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; CHECK-NEXT: movq %xmm1, %rax
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: cvtsi2ss %rax, %xmm1
+; CHECK-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-NEXT: movq %xmm0, %rax
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: cvtsi2ss %rax, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: movq %xmm0, %rax
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2ss %rax, %xmm0
+; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_sitofp_v4f32_v4i64:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
+; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
+; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: constrained_vector_sitofp_v4f32_v4i64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
+; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
+; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX512-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
+; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+entry:
+ %result = call <4 x float>
+ @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x float> %result
+}
+
+define <1 x double> @constrained_vector_uitofp_v1f64_v1i32(<1 x i32> %x) #0 {
+; CHECK-LABEL: constrained_vector_uitofp_v1f64_v1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: cvtsi2sd %rax, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_uitofp_v1f64_v1i32:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: movl %edi, %eax
+; AVX1-NEXT: vcvtsi2sd %rax, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: constrained_vector_uitofp_v1f64_v1i32:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0
+; AVX512-NEXT: retq
+entry:
+ %result = call <1 x double>
+ @llvm.experimental.constrained.uitofp.v1f64.v1i32(<1 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <1 x double> %result
+}
+
+define <1 x float> @constrained_vector_uitofp_v1f32_v1i32(<1 x i32> %x) #0 {
+; CHECK-LABEL: constrained_vector_uitofp_v1f32_v1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: cvtsi2ss %rax, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_uitofp_v1f32_v1i32:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: movl %edi, %eax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i32:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0
+; AVX512-NEXT: retq
+entry:
+ %result = call <1 x float>
+ @llvm.experimental.constrained.uitofp.v1f32.v1i32(<1 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <1 x float> %result
+}
+
+define <1 x double> @constrained_vector_uitofp_v1f64_v1i64(<1 x i64> %x) #0 {
+; CHECK-LABEL: constrained_vector_uitofp_v1f64_v1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %rdi, %xmm1
+; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; CHECK-NEXT: subpd {{.*}}(%rip), %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; CHECK-NEXT: addpd %xmm1, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_uitofp_v1f64_v1i64:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vmovq %rdi, %xmm0
+; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: constrained_vector_uitofp_v1f64_v1i64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0
+; AVX512-NEXT: retq
+entry:
+ %result = call <1 x double>
+ @llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <1 x double> %result
+}
+
+define <1 x float> @constrained_vector_uitofp_v1f32_v1i64(<1 x i64> %x) #0 {
+; CHECK-LABEL: constrained_vector_uitofp_v1f32_v1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: js .LBB170_1
+; CHECK-NEXT: # %bb.2: # %entry
+; CHECK-NEXT: cvtsi2ss %rdi, %xmm0
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB170_1:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: shrq %rax
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: orq %rax, %rdi
+; CHECK-NEXT: cvtsi2ss %rdi, %xmm0
+; CHECK-NEXT: addss %xmm0, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_uitofp_v1f32_v1i64:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: testq %rdi, %rdi
+; AVX1-NEXT: js .LBB170_1
+; AVX1-NEXT: # %bb.2: # %entry
+; AVX1-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
+; AVX1-NEXT: retq
+; AVX1-NEXT: .LBB170_1:
+; AVX1-NEXT: movq %rdi, %rax
+; AVX1-NEXT: shrq %rax
+; AVX1-NEXT: andl $1, %edi
+; AVX1-NEXT: orq %rax, %rdi
+; AVX1-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
+; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0
+; AVX512-NEXT: retq
+entry:
+ %result = call <1 x float>
+ @llvm.experimental.constrained.uitofp.v1f32.v1i64(<1 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <1 x float> %result
+}
+
+define <2 x double> @constrained_vector_uitofp_v2f64_v2i32(<2 x i32> %x) #0 {
+; CHECK-LABEL: constrained_vector_uitofp_v2f64_v2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: cvtsi2sd %rax, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2sd %rax, %xmm0
+; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_uitofp_v2f64_v2i32:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vextractps $1, %xmm0, %eax
+; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
+; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: constrained_vector_uitofp_v2f64_v2i32:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vextractps $1, %xmm0, %eax
+; AVX512-NEXT: vcvtusi2sd %eax, %xmm1, %xmm1
+; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vcvtusi2sd %eax, %xmm2, %xmm0
+; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512-NEXT: retq
+entry:
+ %result = call <2 x double>
+ @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <2 x double> %result
+}
+
+define <2 x float> @constrained_vector_uitofp_v2f32_v2i32(<2 x i32> %x) #0 {
+; CHECK-LABEL: constrained_vector_uitofp_v2f32_v2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: cvtsi2ss %rax, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2ss %rax, %xmm0
+; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i32:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vextractps $1, %xmm0, %eax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: constrained_vector_uitofp_v2f32_v2i32:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vextractps $1, %xmm0, %eax
+; AVX512-NEXT: vcvtusi2ss %eax, %xmm1, %xmm1
+; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vcvtusi2ss %eax, %xmm2, %xmm0
+; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX512-NEXT: retq
+entry:
+ %result = call <2 x float>
+ @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <2 x float> %result
+}
+
+define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 {
+; CHECK-LABEL: constrained_vector_uitofp_v2f64_v2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [4294967295,4294967295]
+; CHECK-NEXT: pand %xmm0, %xmm1
+; CHECK-NEXT: por {{.*}}(%rip), %xmm1
+; CHECK-NEXT: psrlq $32, %xmm0
+; CHECK-NEXT: por {{.*}}(%rip), %xmm0
+; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0
+; CHECK-NEXT: addpd %xmm1, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_uitofp_v2f64_v2i64:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; AVX1-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
+; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: constrained_vector_uitofp_v2f64_v2i64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX512-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsrlq $32, %xmm0, %xmm0
+; AVX512-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: retq
+entry:
+ %result = call <2 x double>
+ @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <2 x double> %result
+}
+
+define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
+; CHECK-LABEL: constrained_vector_uitofp_v2f32_v2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: movq %xmm0, %rax
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: js .LBB174_1
+; CHECK-NEXT: # %bb.2: # %entry
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2ss %rax, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; CHECK-NEXT: movq %xmm1, %rax
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: jns .LBB174_5
+; CHECK-NEXT: .LBB174_4:
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shrq %rcx
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: orq %rcx, %rax
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: cvtsi2ss %rax, %xmm1
+; CHECK-NEXT: addss %xmm1, %xmm1
+; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB174_1:
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shrq %rcx
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: orq %rcx, %rax
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2ss %rax, %xmm0
+; CHECK-NEXT: addss %xmm0, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; CHECK-NEXT: movq %xmm1, %rax
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: js .LBB174_4
+; CHECK-NEXT: .LBB174_5: # %entry
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: cvtsi2ss %rax, %xmm1
+; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i64:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: js .LBB174_1
+; AVX1-NEXT: # %bb.2: # %entry
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: jns .LBB174_5
+; AVX1-NEXT: .LBB174_4:
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq %rcx
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: orq %rcx, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
+; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX1-NEXT: retq
+; AVX1-NEXT: .LBB174_1:
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq %rcx
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: orq %rcx, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: js .LBB174_4
+; AVX1-NEXT: .LBB174_5: # %entry
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: constrained_vector_uitofp_v2f32_v2i64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
+; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
+; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX512-NEXT: retq
+entry:
+ %result = call <2 x float>
+ @llvm.experimental.constrained.uitofp.v2f32.v2i64(<2 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <2 x float> %result
+}
+
+define <3 x double> @constrained_vector_uitofp_v3f64_v3i32(<3 x i32> %x) #0 {
+; CHECK-LABEL: constrained_vector_uitofp_v3f64_v3i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: cvtsi2sd %rax, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: cvtsi2sd %rax, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2sd %rax, %xmm0
+; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movapd %xmm2, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i32:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vextractps $1, %xmm0, %eax
+; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
+; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX1-NEXT: vpextrd $2, %xmm0, %eax
+; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: constrained_vector_uitofp_v3f64_v3i32:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vextractps $1, %xmm0, %eax
+; AVX512-NEXT: vcvtusi2sd %eax, %xmm1, %xmm1
+; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vcvtusi2sd %eax, %xmm2, %xmm2
+; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX512-NEXT: vpextrd $2, %xmm0, %eax
+; AVX512-NEXT: vcvtusi2sd %eax, %xmm3, %xmm0
+; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX512-NEXT: retq
+entry:
+ %result = call <3 x double>
+ @llvm.experimental.constrained.uitofp.v3f64.v3i32(<3 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <3 x double> %result
+}
+
+define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 {
+; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: cvtsi2ss %rax, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; CHECK-NEXT: movd %xmm2, %eax
+; CHECK-NEXT: xorps %xmm2, %xmm2
+; CHECK-NEXT: cvtsi2ss %rax, %xmm2
+; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2ss %rax, %xmm0
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i32:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vextractps $1, %xmm0, %eax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
+; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX1-NEXT: vpextrd $2, %xmm0, %eax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: constrained_vector_uitofp_v3f32_v3i32:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vextractps $1, %xmm0, %eax
+; AVX512-NEXT: vcvtusi2ss %eax, %xmm1, %xmm1
+; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vcvtusi2ss %eax, %xmm2, %xmm2
+; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX512-NEXT: vpextrd $2, %xmm0, %eax
+; AVX512-NEXT: vcvtusi2ss %eax, %xmm3, %xmm0
+; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; AVX512-NEXT: retq
+entry:
+ %result = call <3 x float>
+ @llvm.experimental.constrained.uitofp.v3f32.v3i32(<3 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <3 x float> %result
+}
+
+define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
+; CHECK-LABEL: constrained_vector_uitofp_v3f64_v3i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %rdi, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
+; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; CHECK-NEXT: movapd {{.*#+}} xmm3 = [4.503599627370496E+15,1.9342813113834067E+25]
+; CHECK-NEXT: subpd %xmm3, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; CHECK-NEXT: addpd %xmm1, %xmm0
+; CHECK-NEXT: movq %rsi, %xmm4
+; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
+; CHECK-NEXT: subpd %xmm3, %xmm4
+; CHECK-NEXT: movapd %xmm4, %xmm1
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1]
+; CHECK-NEXT: addpd %xmm4, %xmm1
+; CHECK-NEXT: movq %rdx, %xmm4
+; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
+; CHECK-NEXT: subpd %xmm3, %xmm4
+; CHECK-NEXT: movapd %xmm4, %xmm2
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm4[1]
+; CHECK-NEXT: addpd %xmm4, %xmm2
+; CHECK-NEXT: movlpd %xmm2, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i64:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
+; AVX1-NEXT: vunpcklps {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX1-NEXT: vmovapd {{.*#+}} xmm3 = [4.503599627370496E+15,1.9342813113834067E+25]
+; AVX1-NEXT: vsubpd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpermilpd {{.*#+}} xmm4 = xmm2[1,0]
+; AVX1-NEXT: vaddpd %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpermilps {{.*#+}} xmm4 = xmm0[2,3,0,1]
+; AVX1-NEXT: vunpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; AVX1-NEXT: vsubpd %xmm3, %xmm4, %xmm4
+; AVX1-NEXT: vpermilpd {{.*#+}} xmm5 = xmm4[1,0]
+; AVX1-NEXT: vaddpd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm4[0]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX1-NEXT: vsubpd %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: constrained_vector_uitofp_v3f64_v3i64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512-NEXT: vcvtusi2sd %rax, %xmm1, %xmm1
+; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vcvtusi2sd %rax, %xmm2, %xmm2
+; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vcvtusi2sd %rax, %xmm3, %xmm0
+; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX512-NEXT: retq
+entry:
+ %result = call <3 x double>
+ @llvm.experimental.constrained.uitofp.v3f64.v3i64(<3 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <3 x double> %result
+}
+
+define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
+; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: js .LBB178_1
+; CHECK-NEXT: # %bb.2: # %entry
+; CHECK-NEXT: cvtsi2ss %rsi, %xmm1
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: jns .LBB178_5
+; CHECK-NEXT: .LBB178_4:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: shrq %rax
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: orq %rax, %rdi
+; CHECK-NEXT: cvtsi2ss %rdi, %xmm0
+; CHECK-NEXT: addss %xmm0, %xmm0
+; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: jns .LBB178_8
+; CHECK-NEXT: .LBB178_7:
+; CHECK-NEXT: movq %rdx, %rax
+; CHECK-NEXT: shrq %rax
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: cvtsi2ss %rdx, %xmm1
+; CHECK-NEXT: addss %xmm1, %xmm1
+; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB178_1:
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: shrq %rax
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: orq %rax, %rsi
+; CHECK-NEXT: cvtsi2ss %rsi, %xmm1
+; CHECK-NEXT: addss %xmm1, %xmm1
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: js .LBB178_4
+; CHECK-NEXT: .LBB178_5: # %entry
+; CHECK-NEXT: cvtsi2ss %rdi, %xmm0
+; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: js .LBB178_7
+; CHECK-NEXT: .LBB178_8: # %entry
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: cvtsi2ss %rdx, %xmm1
+; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i64:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: js .LBB178_1
+; AVX1-NEXT: # %bb.2: # %entry
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: jns .LBB178_5
+; AVX1-NEXT: .LBB178_4:
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq %rcx
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: orq %rcx, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
+; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: jmp .LBB178_6
+; AVX1-NEXT: .LBB178_1:
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq %rcx
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: orq %rcx, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: js .LBB178_4
+; AVX1-NEXT: .LBB178_5: # %entry
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
+; AVX1-NEXT: .LBB178_6: # %entry
+; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: js .LBB178_7
+; AVX1-NEXT: # %bb.8: # %entry
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+; AVX1-NEXT: .LBB178_7:
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq %rcx
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: orq %rcx, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
+; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: constrained_vector_uitofp_v3f32_v3i64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
+; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
+; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
+; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+entry:
+ %result = call <3 x float>
+ @llvm.experimental.constrained.uitofp.v3f32.v3i64(<3 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <3 x float> %result
+}
+
+define <4 x double> @constrained_vector_uitofp_v4f64_v4i32(<4 x i32> %x) #0 {
+; CHECK-LABEL: constrained_vector_uitofp_v4f64_v4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: cvtsi2sd %rax, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: cvtsi2sd %rax, %xmm1
+; CHECK-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: cvtsi2sd %rax, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: cvtsi2sd %rax, %xmm1
+; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; CHECK-NEXT: movapd %xmm2, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_uitofp_v4f64_v4i32:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
+; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1
+; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: constrained_vector_uitofp_v4f64_v4i32:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
+; AVX512-NEXT: vcvtdq2pd %xmm1, %ymm1
+; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm2 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4]
+; AVX512-NEXT: vmulpd %ymm2, %ymm1, %ymm1
+; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
+; AVX512-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX512-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: retq
+entry:
+ %result = call <4 x double>
+ @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x double> %result
+}
+
+define <4 x float> @constrained_vector_uitofp_v4f32_v4i32(<4 x i32> %x) #0 {
+; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movaps {{.*#+}} xmm1 = [65535,65535,65535,65535]
+; CHECK-NEXT: andps %xmm0, %xmm1
+; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1
+; CHECK-NEXT: psrld $16, %xmm0
+; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0
+; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
+; CHECK-NEXT: addps %xmm1, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i32:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
+; AVX1-NEXT: vcvtdq2ps %xmm1, %xmm1
+; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX1-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: constrained_vector_uitofp_v4f32_v4i32:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
+; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm1
+; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4]
+; AVX512-NEXT: vmulps %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
+; AVX512-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX512-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: retq
+entry:
+ %result = call <4 x float>
+ @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x float> %result
+}
+
+define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 {
+; CHECK-LABEL: constrained_vector_uitofp_v4f64_v4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
+; CHECK-NEXT: movdqa %xmm0, %xmm3
+; CHECK-NEXT: pand %xmm2, %xmm3
+; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
+; CHECK-NEXT: por %xmm4, %xmm3
+; CHECK-NEXT: psrlq $32, %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
+; CHECK-NEXT: por %xmm5, %xmm0
+; CHECK-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
+; CHECK-NEXT: subpd %xmm6, %xmm0
+; CHECK-NEXT: addpd %xmm3, %xmm0
+; CHECK-NEXT: pand %xmm1, %xmm2
+; CHECK-NEXT: por %xmm4, %xmm2
+; CHECK-NEXT: psrlq $32, %xmm1
+; CHECK-NEXT: por %xmm5, %xmm1
+; CHECK-NEXT: subpd %xmm6, %xmm1
+; CHECK-NEXT: addpd %xmm2, %xmm1
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_uitofp_v4f64_v4i64:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX1-NEXT: vorps {{.*}}(%rip), %ymm1, %ymm1
+; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT: vorpd {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vsubpd {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: constrained_vector_uitofp_v4f64_v4i64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
+; AVX512-NEXT: vpor %ymm2, %ymm1, %ymm1
+; AVX512-NEXT: vpsrlq $32, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
+; AVX512-NEXT: vpor %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
+; AVX512-NEXT: vsubpd %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: retq
+entry:
+ %result = call <4 x double>
+ @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x double> %result
+}
+
+define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
+; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %xmm1, %rax
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: js .LBB182_1
+; CHECK-NEXT: # %bb.2: # %entry
+; CHECK-NEXT: cvtsi2ss %rax, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; CHECK-NEXT: movq %xmm1, %rax
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: jns .LBB182_5
+; CHECK-NEXT: .LBB182_4:
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shrq %rcx
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: orq %rcx, %rax
+; CHECK-NEXT: cvtsi2ss %rax, %xmm3
+; CHECK-NEXT: addss %xmm3, %xmm3
+; CHECK-NEXT: movq %xmm0, %rax
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: jns .LBB182_8
+; CHECK-NEXT: .LBB182_7:
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shrq %rcx
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: orq %rcx, %rax
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: cvtsi2ss %rax, %xmm1
+; CHECK-NEXT: addss %xmm1, %xmm1
+; CHECK-NEXT: jmp .LBB182_9
+; CHECK-NEXT: .LBB182_1:
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shrq %rcx
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: orq %rcx, %rax
+; CHECK-NEXT: cvtsi2ss %rax, %xmm2
+; CHECK-NEXT: addss %xmm2, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; CHECK-NEXT: movq %xmm1, %rax
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: js .LBB182_4
+; CHECK-NEXT: .LBB182_5: # %entry
+; CHECK-NEXT: cvtsi2ss %rax, %xmm3
+; CHECK-NEXT: movq %xmm0, %rax
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: js .LBB182_7
+; CHECK-NEXT: .LBB182_8: # %entry
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: cvtsi2ss %rax, %xmm1
+; CHECK-NEXT: .LBB182_9: # %entry
+; CHECK-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: movq %xmm0, %rax
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: js .LBB182_10
+; CHECK-NEXT: # %bb.11: # %entry
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2ss %rax, %xmm0
+; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB182_10:
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shrq %rcx
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: orq %rcx, %rax
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2ss %rax, %xmm0
+; CHECK-NEXT: addss %xmm0, %xmm0
+; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+;
+; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i64:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: js .LBB182_1
+; AVX1-NEXT: # %bb.2: # %entry
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: jns .LBB182_5
+; AVX1-NEXT: .LBB182_4:
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq %rcx
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: orq %rcx, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
+; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: jmp .LBB182_6
+; AVX1-NEXT: .LBB182_1:
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq %rcx
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: orq %rcx, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: js .LBB182_4
+; AVX1-NEXT: .LBB182_5: # %entry
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
+; AVX1-NEXT: .LBB182_6: # %entry
+; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: js .LBB182_7
+; AVX1-NEXT: # %bb.8: # %entry
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
+; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: jns .LBB182_11
+; AVX1-NEXT: .LBB182_10:
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq %rcx
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: orq %rcx, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
+; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+; AVX1-NEXT: .LBB182_7:
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq %rcx
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: orq %rcx, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
+; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: js .LBB182_10
+; AVX1-NEXT: .LBB182_11: # %entry
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX512F-LABEL: constrained_vector_uitofp_v4f32_v4i64:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
+; AVX512F-NEXT: vmovq %xmm0, %rax
+; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512F-NEXT: vmovq %xmm0, %rax
+; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512DQ-LABEL: constrained_vector_uitofp_v4f32_v4i64:
+; AVX512DQ: # %bb.0: # %entry
+; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX512DQ-NEXT: vmovq %xmm0, %rax
+; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX512DQ-NEXT: vmovq %xmm2, %rax
+; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
+; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rax
+; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
+; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
+; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
+; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm2
+; AVX512DQ-NEXT: vpsrlq $1, %ymm0, %ymm3
+; AVX512DQ-NEXT: vpor %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rax
+; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
+; AVX512DQ-NEXT: vmovq %xmm2, %rax
+; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
+; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
+; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm2
+; AVX512DQ-NEXT: vmovq %xmm2, %rax
+; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
+; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
+; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rax
+; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm2
+; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[0]
+; AVX512DQ-NEXT: vaddps %xmm2, %xmm2, %xmm2
+; AVX512DQ-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpcmpgtq %ymm0, %ymm3, %ymm0
+; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512DQ-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+entry:
+ %result = call <4 x float>
+ @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x float> %result
+}
+
attributes #0 = { strictfp }
; Single width declarations
@@ -5822,6 +7329,14 @@ declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, met
declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32>, metadata, metadata)
+declare <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64>, metadata, metadata)
+declare <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i64(<2 x i64>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32>, metadata, metadata)
+declare <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64>, metadata, metadata)
+declare <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i64(<2 x i64>, metadata, metadata)
; Scalar width declarations
declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata)
@@ -5857,6 +7372,14 @@ declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metad
declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i32(<1 x i32>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.sitofp.v1f32.v1i32(<1 x i32>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.sitofp.v1f32.v1i64(<1 x i64>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i32(<1 x i32>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.uitofp.v1f32.v1i32(<1 x i32>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.uitofp.v1f32.v1i64(<1 x i64>, metadata, metadata)
; Illegal width declarations
declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata)
@@ -5915,6 +7438,14 @@ declare <3 x float> @llvm.experimental.constrained.round.v3f32(<3 x float>, meta
declare <3 x double> @llvm.experimental.constrained.round.v3f64(<3 x double>, metadata, metadata)
declare <3 x float> @llvm.experimental.constrained.trunc.v3f32(<3 x float>, metadata, metadata)
declare <3 x double> @llvm.experimental.constrained.trunc.v3f64(<3 x double>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.sitofp.v3f64.v3i32(<3 x i32>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.sitofp.v3f32.v3i32(<3 x i32>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.sitofp.v3f64.v3i64(<3 x i64>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.sitofp.v3f32.v3i64(<3 x i64>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.uitofp.v3f64.v3i32(<3 x i32>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.uitofp.v3f32.v3i32(<3 x i32>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.uitofp.v3f64.v3i64(<3 x i64>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.uitofp.v3f32.v3i64(<3 x i64>, metadata, metadata)
; Double width declarations
declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata)
@@ -5950,3 +7481,12 @@ declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, met
declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
+
diff --git a/llvm/test/Feature/fp-intrinsics.ll b/llvm/test/Feature/fp-intrinsics.ll
index 616897c3a002..b92408a1bf1c 100644
--- a/llvm/test/Feature/fp-intrinsics.ll
+++ b/llvm/test/Feature/fp-intrinsics.ll
@@ -373,6 +373,28 @@ entry:
ret i64 %result
}
+; Verify that sitofp(42) isn't simplified when the rounding mode is unknown.
+; CHECK-LABEL: @f30
+; CHECK: call double @llvm.experimental.constrained.sitofp
+define double @f30() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 42,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret double %result
+}
+
+; Verify that uitofp(42) isn't simplified when the rounding mode is unknown.
+; CHECK-LABEL: @f31
+; CHECK: call double @llvm.experimental.constrained.uitofp
+define double @f31() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 42,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret double %result
+}
+
attributes #0 = { strictfp }
@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
@@ -405,3 +427,5 @@ declare i32 @llvm.experimental.constrained.lround.i32.f64(double, metadata)
declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata)
declare i64 @llvm.experimental.constrained.llround.i64.f64(double, metadata)
declare i64 @llvm.experimental.constrained.llround.i64.f32(float, metadata)
+declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata)
+declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata)
More information about the llvm-commits
mailing list