[llvm] 6e20d70 - [LegalizeDAG] Convert strict fp nodes to libcalls without losing the chain.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 18 11:28:08 PST 2019
Author: Craig Topper
Date: 2019-11-18T11:24:08-08:00
New Revision: 6e20d70a695028099b1705cfcfd1373d9d62e747
URL: https://github.com/llvm/llvm-project/commit/6e20d70a695028099b1705cfcfd1373d9d62e747
DIFF: https://github.com/llvm/llvm-project/commit/6e20d70a695028099b1705cfcfd1373d9d62e747.diff
LOG: [LegalizeDAG] Convert strict fp nodes to libcalls without losing the chain.
Previously we mutated the node and then converted it to a libcall. But this loses the chain information.
This patch keeps the chain, but unfortunately breaks tail call optimization as the functions involved in deciding if a node is in tail call position can't handle the chain. But correct ordering seems more important to be right.
Somehow the SystemZ tests improved. I looked at one of them and it seemed that we're handling the split vector elements in a different order and that made the copies work better.
Differential Revision: https://reviews.llvm.org/D70334
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
llvm/test/CodeGen/X86/fp-intrinsics.ll
llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index c42712c6aab6..4b00ec08c4bd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -140,20 +140,22 @@ class SelectionDAGLegalize {
std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
SDNode *Node, bool isSigned);
- SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
- RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
- RTLIB::Libcall Call_F128,
- RTLIB::Libcall Call_PPCF128);
+ void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results);
SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
RTLIB::Libcall Call_I8,
RTLIB::Libcall Call_I16,
RTLIB::Libcall Call_I32,
RTLIB::Libcall Call_I64,
RTLIB::Libcall Call_I128);
- SDValue ExpandArgFPLibCall(SDNode *Node,
- RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
- RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
- RTLIB::Libcall Call_PPCF128);
+ void ExpandArgFPLibCall(SDNode *Node,
+ RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results);
void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
@@ -2117,15 +2119,13 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
return CallInfo;
}
-SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
- RTLIB::Libcall Call_F32,
- RTLIB::Libcall Call_F64,
- RTLIB::Libcall Call_F80,
- RTLIB::Libcall Call_F128,
- RTLIB::Libcall Call_PPCF128) {
- if (Node->isStrictFPOpcode())
- Node = DAG.mutateStrictFPToFP(Node);
-
+void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results) {
RTLIB::Libcall LC;
switch (Node->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
@@ -2135,7 +2135,16 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
case MVT::f128: LC = Call_F128; break;
case MVT::ppcf128: LC = Call_PPCF128; break;
}
- return ExpandLibCall(LC, Node, false);
+
+ if (Node->isStrictFPOpcode()) {
+ // FIXME: This doesn't support tail calls.
+ std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false);
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ } else {
+ SDValue Tmp = ExpandLibCall(LC, Node, false);
+ Results.push_back(Tmp);
+ }
}
SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
@@ -2158,17 +2167,17 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
/// Expand the node to a libcall based on first argument type (for instance
/// lround and its variant).
-SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
- RTLIB::Libcall Call_F32,
- RTLIB::Libcall Call_F64,
- RTLIB::Libcall Call_F80,
- RTLIB::Libcall Call_F128,
- RTLIB::Libcall Call_PPCF128) {
- if (Node->isStrictFPOpcode())
- Node = DAG.mutateStrictFPToFP(Node);
+void SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results) {
+ EVT InVT = Node->getOperand(Node->isStrictFPOpcode() ? 1 : 0).getValueType();
RTLIB::Libcall LC;
- switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) {
+ switch (InVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::f32: LC = Call_F32; break;
case MVT::f64: LC = Call_F64; break;
@@ -2177,7 +2186,15 @@ SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
case MVT::ppcf128: LC = Call_PPCF128; break;
}
- return ExpandLibCall(LC, Node, false);
+ if (Node->isStrictFPOpcode()) {
+ // FIXME: This doesn't support tail calls.
+ std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false);
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ } else {
+ SDValue Tmp = ExpandLibCall(LC, Node, false);
+ Results.push_back(Tmp);
+ }
}
/// Issue libcalls to __{u}divmod to compute div / rem pairs.
@@ -3818,38 +3835,38 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
}
case ISD::FMINNUM:
case ISD::STRICT_FMINNUM:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
- RTLIB::FMIN_F80, RTLIB::FMIN_F128,
- RTLIB::FMIN_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128, Results);
break;
case ISD::FMAXNUM:
case ISD::STRICT_FMAXNUM:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
- RTLIB::FMAX_F80, RTLIB::FMAX_F128,
- RTLIB::FMAX_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128, Results);
break;
case ISD::FSQRT:
case ISD::STRICT_FSQRT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
- RTLIB::SQRT_F80, RTLIB::SQRT_F128,
- RTLIB::SQRT_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128, Results);
break;
case ISD::FCBRT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
- RTLIB::CBRT_F80, RTLIB::CBRT_F128,
- RTLIB::CBRT_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
+ RTLIB::CBRT_F80, RTLIB::CBRT_F128,
+ RTLIB::CBRT_PPCF128, Results);
break;
case ISD::FSIN:
case ISD::STRICT_FSIN:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
- RTLIB::SIN_F80, RTLIB::SIN_F128,
- RTLIB::SIN_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128, Results);
break;
case ISD::FCOS:
case ISD::STRICT_FCOS:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
- RTLIB::COS_F80, RTLIB::COS_F128,
- RTLIB::COS_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128, Results);
break;
case ISD::FSINCOS:
// Expand into sincos libcall.
@@ -3858,107 +3875,107 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
case ISD::FLOG:
case ISD::STRICT_FLOG:
if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log_finite))
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32,
- RTLIB::LOG_FINITE_F64,
- RTLIB::LOG_FINITE_F80,
- RTLIB::LOG_FINITE_F128,
- RTLIB::LOG_FINITE_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32,
+ RTLIB::LOG_FINITE_F64,
+ RTLIB::LOG_FINITE_F80,
+ RTLIB::LOG_FINITE_F128,
+ RTLIB::LOG_FINITE_PPCF128, Results);
else
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
- RTLIB::LOG_F80, RTLIB::LOG_F128,
- RTLIB::LOG_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128, Results);
break;
case ISD::FLOG2:
case ISD::STRICT_FLOG2:
if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log2_finite))
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32,
- RTLIB::LOG2_FINITE_F64,
- RTLIB::LOG2_FINITE_F80,
- RTLIB::LOG2_FINITE_F128,
- RTLIB::LOG2_FINITE_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32,
+ RTLIB::LOG2_FINITE_F64,
+ RTLIB::LOG2_FINITE_F80,
+ RTLIB::LOG2_FINITE_F128,
+ RTLIB::LOG2_FINITE_PPCF128, Results);
else
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
- RTLIB::LOG2_F80, RTLIB::LOG2_F128,
- RTLIB::LOG2_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128, Results);
break;
case ISD::FLOG10:
case ISD::STRICT_FLOG10:
if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log10_finite))
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32,
- RTLIB::LOG10_FINITE_F64,
- RTLIB::LOG10_FINITE_F80,
- RTLIB::LOG10_FINITE_F128,
- RTLIB::LOG10_FINITE_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32,
+ RTLIB::LOG10_FINITE_F64,
+ RTLIB::LOG10_FINITE_F80,
+ RTLIB::LOG10_FINITE_F128,
+ RTLIB::LOG10_FINITE_PPCF128, Results);
else
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
- RTLIB::LOG10_F80, RTLIB::LOG10_F128,
- RTLIB::LOG10_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128, Results);
break;
case ISD::FEXP:
case ISD::STRICT_FEXP:
if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp_finite))
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32,
- RTLIB::EXP_FINITE_F64,
- RTLIB::EXP_FINITE_F80,
- RTLIB::EXP_FINITE_F128,
- RTLIB::EXP_FINITE_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32,
+ RTLIB::EXP_FINITE_F64,
+ RTLIB::EXP_FINITE_F80,
+ RTLIB::EXP_FINITE_F128,
+ RTLIB::EXP_FINITE_PPCF128, Results);
else
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
- RTLIB::EXP_F80, RTLIB::EXP_F128,
- RTLIB::EXP_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128, Results);
break;
case ISD::FEXP2:
case ISD::STRICT_FEXP2:
if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp2_finite))
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32,
- RTLIB::EXP2_FINITE_F64,
- RTLIB::EXP2_FINITE_F80,
- RTLIB::EXP2_FINITE_F128,
- RTLIB::EXP2_FINITE_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32,
+ RTLIB::EXP2_FINITE_F64,
+ RTLIB::EXP2_FINITE_F80,
+ RTLIB::EXP2_FINITE_F128,
+ RTLIB::EXP2_FINITE_PPCF128, Results);
else
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
- RTLIB::EXP2_F80, RTLIB::EXP2_F128,
- RTLIB::EXP2_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128, Results);
break;
case ISD::FTRUNC:
case ISD::STRICT_FTRUNC:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
- RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
- RTLIB::TRUNC_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128, Results);
break;
case ISD::FFLOOR:
case ISD::STRICT_FFLOOR:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
- RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
- RTLIB::FLOOR_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128, Results);
break;
case ISD::FCEIL:
case ISD::STRICT_FCEIL:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
- RTLIB::CEIL_F80, RTLIB::CEIL_F128,
- RTLIB::CEIL_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128, Results);
break;
case ISD::FRINT:
case ISD::STRICT_FRINT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
- RTLIB::RINT_F80, RTLIB::RINT_F128,
- RTLIB::RINT_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128, Results);
break;
case ISD::FNEARBYINT:
case ISD::STRICT_FNEARBYINT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
- RTLIB::NEARBYINT_F64,
- RTLIB::NEARBYINT_F80,
- RTLIB::NEARBYINT_F128,
- RTLIB::NEARBYINT_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128, Results);
break;
case ISD::FROUND:
case ISD::STRICT_FROUND:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
- RTLIB::ROUND_F64,
- RTLIB::ROUND_F80,
- RTLIB::ROUND_F128,
- RTLIB::ROUND_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128, Results);
break;
case ISD::FPOWI:
case ISD::STRICT_FPOWI: {
@@ -3981,78 +3998,78 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
Exponent));
break;
}
- Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
- RTLIB::POWI_F80, RTLIB::POWI_F128,
- RTLIB::POWI_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128, Results);
break;
}
case ISD::FPOW:
case ISD::STRICT_FPOW:
if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_pow_finite))
- Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32,
- RTLIB::POW_FINITE_F64,
- RTLIB::POW_FINITE_F80,
- RTLIB::POW_FINITE_F128,
- RTLIB::POW_FINITE_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32,
+ RTLIB::POW_FINITE_F64,
+ RTLIB::POW_FINITE_F80,
+ RTLIB::POW_FINITE_F128,
+ RTLIB::POW_FINITE_PPCF128, Results);
else
- Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
- RTLIB::POW_F80, RTLIB::POW_F128,
- RTLIB::POW_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128, Results);
break;
case ISD::LROUND:
case ISD::STRICT_LROUND:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
- RTLIB::LROUND_F64, RTLIB::LROUND_F80,
- RTLIB::LROUND_F128,
- RTLIB::LROUND_PPCF128));
+ ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64, RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128, Results);
break;
case ISD::LLROUND:
case ISD::STRICT_LLROUND:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
- RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
- RTLIB::LLROUND_F128,
- RTLIB::LLROUND_PPCF128));
+ ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128, Results);
break;
case ISD::LRINT:
case ISD::STRICT_LRINT:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
- RTLIB::LRINT_F64, RTLIB::LRINT_F80,
- RTLIB::LRINT_F128,
- RTLIB::LRINT_PPCF128));
+ ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64, RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128, Results);
break;
case ISD::LLRINT:
case ISD::STRICT_LLRINT:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
- RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
- RTLIB::LLRINT_F128,
- RTLIB::LLRINT_PPCF128));
+ ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128, Results);
break;
case ISD::FDIV:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
- RTLIB::DIV_F80, RTLIB::DIV_F128,
- RTLIB::DIV_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+ RTLIB::DIV_F80, RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128, Results);
break;
case ISD::FREM:
case ISD::STRICT_FREM:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
- RTLIB::REM_F80, RTLIB::REM_F128,
- RTLIB::REM_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128, Results);
break;
case ISD::FMA:
case ISD::STRICT_FMA:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
- RTLIB::FMA_F80, RTLIB::FMA_F128,
- RTLIB::FMA_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+ RTLIB::FMA_F80, RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128, Results);
break;
case ISD::FADD:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
- RTLIB::ADD_F80, RTLIB::ADD_F128,
- RTLIB::ADD_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128, Results);
break;
case ISD::FMUL:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
- RTLIB::MUL_F80, RTLIB::MUL_F128,
- RTLIB::MUL_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
+ RTLIB::MUL_F80, RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128, Results);
break;
case ISD::FP16_TO_FP:
if (Node->getValueType(0) == MVT::f32) {
@@ -4067,9 +4084,9 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
break;
}
case ISD::FSUB:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
- RTLIB::SUB_F80, RTLIB::SUB_F128,
- RTLIB::SUB_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
+ RTLIB::SUB_F80, RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128, Results);
break;
case ISD::SREM:
Results.push_back(ExpandIntLibCall(Node, true,
diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
index 64097eea38ff..6c53ffc785d3 100644
--- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
@@ -244,8 +244,7 @@ define <2 x double> @constrained_vector_frem_v2f64() #0 {
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: ldr %f2, %f8
; S390X-NEXT: brasl %r14, fmod at PLT
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: ldr %f0, %f9
+; S390X-NEXT: ldr %f2, %f9
; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 288(%r15)
@@ -317,9 +316,8 @@ define <3 x float> @constrained_vector_frem_v3f32() #0 {
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: ler %f2, %f8
; S390X-NEXT: brasl %r14, fmodf at PLT
-; S390X-NEXT: ler %f4, %f0
-; S390X-NEXT: ler %f0, %f9
; S390X-NEXT: ler %f2, %f10
+; S390X-NEXT: ler %f4, %f9
; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload
@@ -501,10 +499,9 @@ define <4 x double> @constrained_vector_frem_v4f64() #0 {
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: ldr %f2, %f8
; S390X-NEXT: brasl %r14, fmod at PLT
-; S390X-NEXT: ldr %f6, %f0
-; S390X-NEXT: ldr %f0, %f9
-; S390X-NEXT: ldr %f2, %f10
-; S390X-NEXT: ldr %f4, %f11
+; S390X-NEXT: ldr %f2, %f11
+; S390X-NEXT: ldr %f4, %f10
+; S390X-NEXT: ldr %f6, %f9
; S390X-NEXT: ld %f8, 184(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 176(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f10, 168(%r15) # 8-byte Folded Reload
@@ -1288,8 +1285,7 @@ define <2 x double> @constrained_vector_pow_v2f64() #0 {
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: ldr %f2, %f8
; S390X-NEXT: brasl %r14, pow at PLT
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: ldr %f0, %f9
+; S390X-NEXT: ldr %f2, %f9
; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 288(%r15)
@@ -1363,9 +1359,8 @@ define <3 x float> @constrained_vector_pow_v3f32() #0 {
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: ler %f2, %f8
; S390X-NEXT: brasl %r14, powf at PLT
-; S390X-NEXT: ler %f4, %f0
-; S390X-NEXT: ler %f0, %f9
; S390X-NEXT: ler %f2, %f10
+; S390X-NEXT: ler %f4, %f9
; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload
@@ -1553,10 +1548,9 @@ define <4 x double> @constrained_vector_pow_v4f64() #0 {
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: ldr %f2, %f8
; S390X-NEXT: brasl %r14, pow at PLT
-; S390X-NEXT: ldr %f6, %f0
-; S390X-NEXT: ldr %f0, %f9
-; S390X-NEXT: ldr %f2, %f10
-; S390X-NEXT: ldr %f4, %f11
+; S390X-NEXT: ldr %f2, %f11
+; S390X-NEXT: ldr %f4, %f10
+; S390X-NEXT: ldr %f6, %f9
; S390X-NEXT: ld %f8, 184(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 176(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f10, 168(%r15) # 8-byte Folded Reload
@@ -1676,8 +1670,7 @@ define <2 x double> @constrained_vector_powi_v2f64() #0 {
; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, __powidf2 at PLT
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: ldr %f0, %f8
+; S390X-NEXT: ldr %f2, %f8
; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 280(%r15)
; S390X-NEXT: br %r14
@@ -1741,9 +1734,8 @@ define <3 x float> @constrained_vector_powi_v3f32() #0 {
; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: brasl %r14, __powisf2 at PLT
-; S390X-NEXT: ler %f4, %f0
-; S390X-NEXT: ler %f0, %f8
; S390X-NEXT: ler %f2, %f9
+; S390X-NEXT: ler %f4, %f8
; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 288(%r15)
@@ -1803,12 +1795,12 @@ define void @constrained_vector_powi_v3f64(<3 x double>* %a) #0 {
; S390X-NEXT: .cfi_offset %f8, -168
; S390X-NEXT: .cfi_offset %f9, -176
; S390X-NEXT: larl %r1, .LCPI38_0
-; S390X-NEXT: ld %f0, 0(%r1)
+; S390X-NEXT: ldeb %f0, 0(%r1)
; S390X-NEXT: lgr %r13, %r2
; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: brasl %r14, __powidf2 at PLT
; S390X-NEXT: larl %r1, .LCPI38_1
-; S390X-NEXT: ldeb %f1, 0(%r1)
+; S390X-NEXT: ld %f1, 0(%r1)
; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ldr %f0, %f1
@@ -1819,9 +1811,9 @@ define void @constrained_vector_powi_v3f64(<3 x double>* %a) #0 {
; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, __powidf2 at PLT
-; S390X-NEXT: std %f0, 8(%r13)
-; S390X-NEXT: std %f9, 0(%r13)
-; S390X-NEXT: std %f8, 16(%r13)
+; S390X-NEXT: std %f0, 16(%r13)
+; S390X-NEXT: std %f9, 8(%r13)
+; S390X-NEXT: std %f8, 0(%r13)
; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r13, %r15, 280(%r15)
@@ -1906,10 +1898,9 @@ define <4 x double> @constrained_vector_powi_v4f64() #0 {
; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, __powidf2 at PLT
-; S390X-NEXT: ldr %f6, %f0
-; S390X-NEXT: ldr %f0, %f8
-; S390X-NEXT: ldr %f2, %f9
-; S390X-NEXT: ldr %f4, %f10
+; S390X-NEXT: ldr %f2, %f10
+; S390X-NEXT: ldr %f4, %f9
+; S390X-NEXT: ldr %f6, %f8
; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload
@@ -2010,15 +2001,14 @@ define <2 x double> @constrained_vector_sin_v2f64() #0 {
; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill
; S390X-NEXT: .cfi_offset %f8, -168
; S390X-NEXT: larl %r1, .LCPI41_0
-; S390X-NEXT: ldeb %f0, 0(%r1)
+; S390X-NEXT: ld %f0, 0(%r1)
; S390X-NEXT: brasl %r14, sin at PLT
; S390X-NEXT: larl %r1, .LCPI41_1
-; S390X-NEXT: ld %f1, 0(%r1)
+; S390X-NEXT: ldeb %f1, 0(%r1)
; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, sin at PLT
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: ldr %f0, %f8
+; S390X-NEXT: ldr %f2, %f8
; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 280(%r15)
; S390X-NEXT: br %r14
@@ -2076,9 +2066,8 @@ define <3 x float> @constrained_vector_sin_v3f32() #0 {
; S390X-NEXT: ler %f9, %f0
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: brasl %r14, sinf at PLT
-; S390X-NEXT: ler %f4, %f0
-; S390X-NEXT: ler %f0, %f8
; S390X-NEXT: ler %f2, %f9
+; S390X-NEXT: ler %f4, %f8
; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 288(%r15)
@@ -2214,7 +2203,7 @@ define <4 x double> @constrained_vector_sin_v4f64() #0 {
; S390X-NEXT: .cfi_offset %f9, -176
; S390X-NEXT: .cfi_offset %f10, -184
; S390X-NEXT: larl %r1, .LCPI44_0
-; S390X-NEXT: ldeb %f0, 0(%r1)
+; S390X-NEXT: ld %f0, 0(%r1)
; S390X-NEXT: brasl %r14, sin at PLT
; S390X-NEXT: larl %r1, .LCPI44_1
; S390X-NEXT: ld %f1, 0(%r1)
@@ -2227,14 +2216,13 @@ define <4 x double> @constrained_vector_sin_v4f64() #0 {
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, sin at PLT
; S390X-NEXT: larl %r1, .LCPI44_3
-; S390X-NEXT: ld %f1, 0(%r1)
+; S390X-NEXT: ldeb %f1, 0(%r1)
; S390X-NEXT: ldr %f10, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, sin at PLT
-; S390X-NEXT: ldr %f6, %f0
-; S390X-NEXT: ldr %f0, %f8
-; S390X-NEXT: ldr %f2, %f9
-; S390X-NEXT: ldr %f4, %f10
+; S390X-NEXT: ldr %f2, %f10
+; S390X-NEXT: ldr %f4, %f9
+; S390X-NEXT: ldr %f6, %f8
; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload
@@ -2330,15 +2318,14 @@ define <2 x double> @constrained_vector_cos_v2f64() #0 {
; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill
; S390X-NEXT: .cfi_offset %f8, -168
; S390X-NEXT: larl %r1, .LCPI46_0
-; S390X-NEXT: ldeb %f0, 0(%r1)
+; S390X-NEXT: ld %f0, 0(%r1)
; S390X-NEXT: brasl %r14, cos at PLT
; S390X-NEXT: larl %r1, .LCPI46_1
-; S390X-NEXT: ld %f1, 0(%r1)
+; S390X-NEXT: ldeb %f1, 0(%r1)
; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, cos at PLT
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: ldr %f0, %f8
+; S390X-NEXT: ldr %f2, %f8
; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 280(%r15)
; S390X-NEXT: br %r14
@@ -2396,9 +2383,8 @@ define <3 x float> @constrained_vector_cos_v3f32() #0 {
; S390X-NEXT: ler %f9, %f0
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: brasl %r14, cosf at PLT
-; S390X-NEXT: ler %f4, %f0
-; S390X-NEXT: ler %f0, %f8
; S390X-NEXT: ler %f2, %f9
+; S390X-NEXT: ler %f4, %f8
; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 288(%r15)
@@ -2534,7 +2520,7 @@ define <4 x double> @constrained_vector_cos_v4f64() #0 {
; S390X-NEXT: .cfi_offset %f9, -176
; S390X-NEXT: .cfi_offset %f10, -184
; S390X-NEXT: larl %r1, .LCPI49_0
-; S390X-NEXT: ldeb %f0, 0(%r1)
+; S390X-NEXT: ld %f0, 0(%r1)
; S390X-NEXT: brasl %r14, cos at PLT
; S390X-NEXT: larl %r1, .LCPI49_1
; S390X-NEXT: ld %f1, 0(%r1)
@@ -2547,14 +2533,13 @@ define <4 x double> @constrained_vector_cos_v4f64() #0 {
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, cos at PLT
; S390X-NEXT: larl %r1, .LCPI49_3
-; S390X-NEXT: ld %f1, 0(%r1)
+; S390X-NEXT: ldeb %f1, 0(%r1)
; S390X-NEXT: ldr %f10, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, cos at PLT
-; S390X-NEXT: ldr %f6, %f0
-; S390X-NEXT: ldr %f0, %f8
-; S390X-NEXT: ldr %f2, %f9
-; S390X-NEXT: ldr %f4, %f10
+; S390X-NEXT: ldr %f2, %f10
+; S390X-NEXT: ldr %f4, %f9
+; S390X-NEXT: ldr %f6, %f8
; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload
@@ -2650,15 +2635,14 @@ define <2 x double> @constrained_vector_exp_v2f64() #0 {
; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill
; S390X-NEXT: .cfi_offset %f8, -168
; S390X-NEXT: larl %r1, .LCPI51_0
-; S390X-NEXT: ldeb %f0, 0(%r1)
+; S390X-NEXT: ld %f0, 0(%r1)
; S390X-NEXT: brasl %r14, exp at PLT
; S390X-NEXT: larl %r1, .LCPI51_1
-; S390X-NEXT: ld %f1, 0(%r1)
+; S390X-NEXT: ldeb %f1, 0(%r1)
; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, exp at PLT
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: ldr %f0, %f8
+; S390X-NEXT: ldr %f2, %f8
; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 280(%r15)
; S390X-NEXT: br %r14
@@ -2716,9 +2700,8 @@ define <3 x float> @constrained_vector_exp_v3f32() #0 {
; S390X-NEXT: ler %f9, %f0
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: brasl %r14, expf at PLT
-; S390X-NEXT: ler %f4, %f0
-; S390X-NEXT: ler %f0, %f8
; S390X-NEXT: ler %f2, %f9
+; S390X-NEXT: ler %f4, %f8
; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 288(%r15)
@@ -2854,7 +2837,7 @@ define <4 x double> @constrained_vector_exp_v4f64() #0 {
; S390X-NEXT: .cfi_offset %f9, -176
; S390X-NEXT: .cfi_offset %f10, -184
; S390X-NEXT: larl %r1, .LCPI54_0
-; S390X-NEXT: ldeb %f0, 0(%r1)
+; S390X-NEXT: ld %f0, 0(%r1)
; S390X-NEXT: brasl %r14, exp at PLT
; S390X-NEXT: larl %r1, .LCPI54_1
; S390X-NEXT: ld %f1, 0(%r1)
@@ -2867,14 +2850,13 @@ define <4 x double> @constrained_vector_exp_v4f64() #0 {
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, exp at PLT
; S390X-NEXT: larl %r1, .LCPI54_3
-; S390X-NEXT: ld %f1, 0(%r1)
+; S390X-NEXT: ldeb %f1, 0(%r1)
; S390X-NEXT: ldr %f10, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, exp at PLT
-; S390X-NEXT: ldr %f6, %f0
-; S390X-NEXT: ldr %f0, %f8
-; S390X-NEXT: ldr %f2, %f9
-; S390X-NEXT: ldr %f4, %f10
+; S390X-NEXT: ldr %f2, %f10
+; S390X-NEXT: ldr %f4, %f9
+; S390X-NEXT: ldr %f6, %f8
; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload
@@ -2970,15 +2952,14 @@ define <2 x double> @constrained_vector_exp2_v2f64() #0 {
; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill
; S390X-NEXT: .cfi_offset %f8, -168
; S390X-NEXT: larl %r1, .LCPI56_0
-; S390X-NEXT: ld %f0, 0(%r1)
+; S390X-NEXT: ldeb %f0, 0(%r1)
; S390X-NEXT: brasl %r14, exp2 at PLT
; S390X-NEXT: larl %r1, .LCPI56_1
-; S390X-NEXT: ldeb %f1, 0(%r1)
+; S390X-NEXT: ld %f1, 0(%r1)
; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, exp2 at PLT
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: ldr %f0, %f8
+; S390X-NEXT: ldr %f2, %f8
; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 280(%r15)
; S390X-NEXT: br %r14
@@ -3036,9 +3017,8 @@ define <3 x float> @constrained_vector_exp2_v3f32() #0 {
; S390X-NEXT: ler %f9, %f0
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: brasl %r14, exp2f at PLT
-; S390X-NEXT: ler %f4, %f0
-; S390X-NEXT: ler %f0, %f8
; S390X-NEXT: ler %f2, %f9
+; S390X-NEXT: ler %f4, %f8
; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 288(%r15)
@@ -3191,10 +3171,9 @@ define <4 x double> @constrained_vector_exp2_v4f64() #0 {
; S390X-NEXT: ldr %f10, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, exp2 at PLT
-; S390X-NEXT: ldr %f6, %f0
-; S390X-NEXT: ldr %f0, %f8
-; S390X-NEXT: ldr %f2, %f9
-; S390X-NEXT: ldr %f4, %f10
+; S390X-NEXT: ldr %f2, %f10
+; S390X-NEXT: ldr %f4, %f9
+; S390X-NEXT: ldr %f6, %f8
; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload
@@ -3290,15 +3269,14 @@ define <2 x double> @constrained_vector_log_v2f64() #0 {
; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill
; S390X-NEXT: .cfi_offset %f8, -168
; S390X-NEXT: larl %r1, .LCPI61_0
-; S390X-NEXT: ldeb %f0, 0(%r1)
+; S390X-NEXT: ld %f0, 0(%r1)
; S390X-NEXT: brasl %r14, log at PLT
; S390X-NEXT: larl %r1, .LCPI61_1
-; S390X-NEXT: ld %f1, 0(%r1)
+; S390X-NEXT: ldeb %f1, 0(%r1)
; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, log at PLT
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: ldr %f0, %f8
+; S390X-NEXT: ldr %f2, %f8
; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 280(%r15)
; S390X-NEXT: br %r14
@@ -3356,9 +3334,8 @@ define <3 x float> @constrained_vector_log_v3f32() #0 {
; S390X-NEXT: ler %f9, %f0
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: brasl %r14, logf at PLT
-; S390X-NEXT: ler %f4, %f0
-; S390X-NEXT: ler %f0, %f8
; S390X-NEXT: ler %f2, %f9
+; S390X-NEXT: ler %f4, %f8
; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 288(%r15)
@@ -3494,7 +3471,7 @@ define <4 x double> @constrained_vector_log_v4f64() #0 {
; S390X-NEXT: .cfi_offset %f9, -176
; S390X-NEXT: .cfi_offset %f10, -184
; S390X-NEXT: larl %r1, .LCPI64_0
-; S390X-NEXT: ldeb %f0, 0(%r1)
+; S390X-NEXT: ld %f0, 0(%r1)
; S390X-NEXT: brasl %r14, log at PLT
; S390X-NEXT: larl %r1, .LCPI64_1
; S390X-NEXT: ld %f1, 0(%r1)
@@ -3507,14 +3484,13 @@ define <4 x double> @constrained_vector_log_v4f64() #0 {
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, log at PLT
; S390X-NEXT: larl %r1, .LCPI64_3
-; S390X-NEXT: ld %f1, 0(%r1)
+; S390X-NEXT: ldeb %f1, 0(%r1)
; S390X-NEXT: ldr %f10, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, log at PLT
-; S390X-NEXT: ldr %f6, %f0
-; S390X-NEXT: ldr %f0, %f8
-; S390X-NEXT: ldr %f2, %f9
-; S390X-NEXT: ldr %f4, %f10
+; S390X-NEXT: ldr %f2, %f10
+; S390X-NEXT: ldr %f4, %f9
+; S390X-NEXT: ldr %f6, %f8
; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload
@@ -3610,15 +3586,14 @@ define <2 x double> @constrained_vector_log10_v2f64() #0 {
; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill
; S390X-NEXT: .cfi_offset %f8, -168
; S390X-NEXT: larl %r1, .LCPI66_0
-; S390X-NEXT: ldeb %f0, 0(%r1)
+; S390X-NEXT: ld %f0, 0(%r1)
; S390X-NEXT: brasl %r14, log10 at PLT
; S390X-NEXT: larl %r1, .LCPI66_1
-; S390X-NEXT: ld %f1, 0(%r1)
+; S390X-NEXT: ldeb %f1, 0(%r1)
; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, log10 at PLT
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: ldr %f0, %f8
+; S390X-NEXT: ldr %f2, %f8
; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 280(%r15)
; S390X-NEXT: br %r14
@@ -3676,9 +3651,8 @@ define <3 x float> @constrained_vector_log10_v3f32() #0 {
; S390X-NEXT: ler %f9, %f0
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: brasl %r14, log10f at PLT
-; S390X-NEXT: ler %f4, %f0
-; S390X-NEXT: ler %f0, %f8
; S390X-NEXT: ler %f2, %f9
+; S390X-NEXT: ler %f4, %f8
; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 288(%r15)
@@ -3814,7 +3788,7 @@ define <4 x double> @constrained_vector_log10_v4f64() #0 {
; S390X-NEXT: .cfi_offset %f9, -176
; S390X-NEXT: .cfi_offset %f10, -184
; S390X-NEXT: larl %r1, .LCPI69_0
-; S390X-NEXT: ldeb %f0, 0(%r1)
+; S390X-NEXT: ld %f0, 0(%r1)
; S390X-NEXT: brasl %r14, log10 at PLT
; S390X-NEXT: larl %r1, .LCPI69_1
; S390X-NEXT: ld %f1, 0(%r1)
@@ -3827,14 +3801,13 @@ define <4 x double> @constrained_vector_log10_v4f64() #0 {
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, log10 at PLT
; S390X-NEXT: larl %r1, .LCPI69_3
-; S390X-NEXT: ld %f1, 0(%r1)
+; S390X-NEXT: ldeb %f1, 0(%r1)
; S390X-NEXT: ldr %f10, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, log10 at PLT
-; S390X-NEXT: ldr %f6, %f0
-; S390X-NEXT: ldr %f0, %f8
-; S390X-NEXT: ldr %f2, %f9
-; S390X-NEXT: ldr %f4, %f10
+; S390X-NEXT: ldr %f2, %f10
+; S390X-NEXT: ldr %f4, %f9
+; S390X-NEXT: ldr %f6, %f8
; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload
@@ -3930,15 +3903,14 @@ define <2 x double> @constrained_vector_log2_v2f64() #0 {
; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill
; S390X-NEXT: .cfi_offset %f8, -168
; S390X-NEXT: larl %r1, .LCPI71_0
-; S390X-NEXT: ldeb %f0, 0(%r1)
+; S390X-NEXT: ld %f0, 0(%r1)
; S390X-NEXT: brasl %r14, log2 at PLT
; S390X-NEXT: larl %r1, .LCPI71_1
-; S390X-NEXT: ld %f1, 0(%r1)
+; S390X-NEXT: ldeb %f1, 0(%r1)
; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, log2 at PLT
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: ldr %f0, %f8
+; S390X-NEXT: ldr %f2, %f8
; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 280(%r15)
; S390X-NEXT: br %r14
@@ -3996,9 +3968,8 @@ define <3 x float> @constrained_vector_log2_v3f32() #0 {
; S390X-NEXT: ler %f9, %f0
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: brasl %r14, log2f at PLT
-; S390X-NEXT: ler %f4, %f0
-; S390X-NEXT: ler %f0, %f8
; S390X-NEXT: ler %f2, %f9
+; S390X-NEXT: ler %f4, %f8
; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 288(%r15)
@@ -4134,7 +4105,7 @@ define <4 x double> @constrained_vector_log2_v4f64() #0 {
; S390X-NEXT: .cfi_offset %f9, -176
; S390X-NEXT: .cfi_offset %f10, -184
; S390X-NEXT: larl %r1, .LCPI74_0
-; S390X-NEXT: ldeb %f0, 0(%r1)
+; S390X-NEXT: ld %f0, 0(%r1)
; S390X-NEXT: brasl %r14, log2 at PLT
; S390X-NEXT: larl %r1, .LCPI74_1
; S390X-NEXT: ld %f1, 0(%r1)
@@ -4147,14 +4118,13 @@ define <4 x double> @constrained_vector_log2_v4f64() #0 {
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, log2 at PLT
; S390X-NEXT: larl %r1, .LCPI74_3
-; S390X-NEXT: ld %f1, 0(%r1)
+; S390X-NEXT: ldeb %f1, 0(%r1)
; S390X-NEXT: ldr %f10, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, log2 at PLT
-; S390X-NEXT: ldr %f6, %f0
-; S390X-NEXT: ldr %f0, %f8
-; S390X-NEXT: ldr %f2, %f9
-; S390X-NEXT: ldr %f4, %f10
+; S390X-NEXT: ldr %f2, %f10
+; S390X-NEXT: ldr %f4, %f9
+; S390X-NEXT: ldr %f6, %f8
; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload
@@ -4396,15 +4366,14 @@ define <2 x double> @constrained_vector_nearbyint_v2f64() #0 {
; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill
; S390X-NEXT: .cfi_offset %f8, -168
; S390X-NEXT: larl %r1, .LCPI81_0
-; S390X-NEXT: ld %f0, 0(%r1)
+; S390X-NEXT: ldeb %f0, 0(%r1)
; S390X-NEXT: brasl %r14, nearbyint at PLT
; S390X-NEXT: larl %r1, .LCPI81_1
-; S390X-NEXT: ldeb %f1, 0(%r1)
+; S390X-NEXT: ld %f1, 0(%r1)
; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, nearbyint at PLT
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: ldr %f0, %f8
+; S390X-NEXT: ldr %f2, %f8
; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 280(%r15)
; S390X-NEXT: br %r14
@@ -4448,9 +4417,8 @@ define <3 x float> @constrained_vector_nearbyint_v3f32() #0 {
; S390X-NEXT: ler %f9, %f0
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: brasl %r14, nearbyintf at PLT
-; S390X-NEXT: ler %f4, %f0
-; S390X-NEXT: ler %f0, %f8
; S390X-NEXT: ler %f2, %f9
+; S390X-NEXT: ler %f4, %f8
; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 288(%r15)
@@ -4565,10 +4533,9 @@ define <4 x double> @constrained_vector_nearbyint_v4f64() #0 {
; S390X-NEXT: ldr %f10, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, nearbyint at PLT
-; S390X-NEXT: ldr %f6, %f0
-; S390X-NEXT: ldr %f0, %f8
-; S390X-NEXT: ldr %f2, %f9
-; S390X-NEXT: ldr %f4, %f10
+; S390X-NEXT: ldr %f2, %f10
+; S390X-NEXT: ldr %f4, %f9
+; S390X-NEXT: ldr %f6, %f8
; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload
@@ -4655,8 +4622,7 @@ define <2 x double> @constrained_vector_maxnum_v2f64() #0 {
; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, fmax at PLT
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: ldr %f0, %f8
+; S390X-NEXT: ldr %f2, %f8
; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 280(%r15)
; S390X-NEXT: br %r14
@@ -4709,10 +4675,10 @@ define <3 x float> @constrained_vector_maxnum_v3f32() #0 {
; S390X-NEXT: .cfi_offset %f9, -176
; S390X-NEXT: .cfi_offset %f10, -184
; S390X-NEXT: larl %r1, .LCPI87_0
-; S390X-NEXT: le %f8, 0(%r1)
+; S390X-NEXT: le %f0, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI87_1
-; S390X-NEXT: le %f2, 0(%r1)
-; S390X-NEXT: ler %f0, %f8
+; S390X-NEXT: le %f8, 0(%r1)
+; S390X-NEXT: ler %f2, %f8
; S390X-NEXT: brasl %r14, fmaxf at PLT
; S390X-NEXT: larl %r1, .LCPI87_2
; S390X-NEXT: le %f1, 0(%r1)
@@ -4722,14 +4688,12 @@ define <3 x float> @constrained_vector_maxnum_v3f32() #0 {
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: brasl %r14, fmaxf at PLT
; S390X-NEXT: larl %r1, .LCPI87_4
-; S390X-NEXT: le %f1, 0(%r1)
+; S390X-NEXT: le %f2, 0(%r1)
; S390X-NEXT: ler %f10, %f0
-; S390X-NEXT: ler %f0, %f1
-; S390X-NEXT: ler %f2, %f8
+; S390X-NEXT: ler %f0, %f8
; S390X-NEXT: brasl %r14, fmaxf at PLT
-; S390X-NEXT: ler %f4, %f0
-; S390X-NEXT: ler %f0, %f9
; S390X-NEXT: ler %f2, %f10
+; S390X-NEXT: ler %f4, %f9
; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload
@@ -4914,10 +4878,9 @@ define <4 x double> @constrained_vector_maxnum_v4f64() #0 {
; S390X-NEXT: ldr %f10, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, fmax at PLT
-; S390X-NEXT: ldr %f6, %f0
-; S390X-NEXT: ldr %f0, %f8
-; S390X-NEXT: ldr %f2, %f9
-; S390X-NEXT: ldr %f4, %f10
+; S390X-NEXT: ldr %f2, %f10
+; S390X-NEXT: ldr %f4, %f9
+; S390X-NEXT: ldr %f6, %f8
; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload
@@ -5038,8 +5001,7 @@ define <2 x double> @constrained_vector_minnum_v2f64() #0 {
; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, fmin at PLT
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: ldr %f0, %f8
+; S390X-NEXT: ldr %f2, %f8
; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 280(%r15)
; S390X-NEXT: br %r14
@@ -5092,10 +5054,10 @@ define <3 x float> @constrained_vector_minnum_v3f32() #0 {
; S390X-NEXT: .cfi_offset %f9, -176
; S390X-NEXT: .cfi_offset %f10, -184
; S390X-NEXT: larl %r1, .LCPI92_0
-; S390X-NEXT: le %f8, 0(%r1)
+; S390X-NEXT: le %f0, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI92_1
-; S390X-NEXT: le %f2, 0(%r1)
-; S390X-NEXT: ler %f0, %f8
+; S390X-NEXT: le %f8, 0(%r1)
+; S390X-NEXT: ler %f2, %f8
; S390X-NEXT: brasl %r14, fminf at PLT
; S390X-NEXT: larl %r1, .LCPI92_2
; S390X-NEXT: le %f1, 0(%r1)
@@ -5105,14 +5067,12 @@ define <3 x float> @constrained_vector_minnum_v3f32() #0 {
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: brasl %r14, fminf at PLT
; S390X-NEXT: larl %r1, .LCPI92_4
-; S390X-NEXT: le %f1, 0(%r1)
+; S390X-NEXT: le %f2, 0(%r1)
; S390X-NEXT: ler %f10, %f0
-; S390X-NEXT: ler %f0, %f1
-; S390X-NEXT: ler %f2, %f8
+; S390X-NEXT: ler %f0, %f8
; S390X-NEXT: brasl %r14, fminf at PLT
-; S390X-NEXT: ler %f4, %f0
-; S390X-NEXT: ler %f0, %f9
; S390X-NEXT: ler %f2, %f10
+; S390X-NEXT: ler %f4, %f9
; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload
@@ -5301,10 +5261,9 @@ define <4 x double> @constrained_vector_minnum_v4f64() #0 {
; S390X-NEXT: ldr %f10, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, fmin at PLT
-; S390X-NEXT: ldr %f6, %f0
-; S390X-NEXT: ldr %f0, %f8
-; S390X-NEXT: ldr %f2, %f9
-; S390X-NEXT: ldr %f4, %f10
+; S390X-NEXT: ldr %f2, %f10
+; S390X-NEXT: ldr %f4, %f9
+; S390X-NEXT: ldr %f6, %f8
; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload
@@ -5661,8 +5620,7 @@ define <2 x double> @constrained_vector_ceil_v2f64() #0 {
; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, ceil at PLT
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: ldr %f0, %f8
+; S390X-NEXT: ldr %f2, %f8
; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 280(%r15)
; S390X-NEXT: br %r14
@@ -5706,9 +5664,8 @@ define <3 x float> @constrained_vector_ceil_v3f32() #0 {
; S390X-NEXT: ler %f9, %f0
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: brasl %r14, ceilf at PLT
-; S390X-NEXT: ler %f4, %f0
-; S390X-NEXT: ler %f0, %f8
; S390X-NEXT: ler %f2, %f9
+; S390X-NEXT: ler %f4, %f8
; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 288(%r15)
@@ -5837,8 +5794,7 @@ define <2 x double> @constrained_vector_floor_v2f64() #0 {
; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, floor at PLT
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: ldr %f0, %f8
+; S390X-NEXT: ldr %f2, %f8
; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 280(%r15)
; S390X-NEXT: br %r14
@@ -5882,9 +5838,8 @@ define <3 x float> @constrained_vector_floor_v3f32() #0 {
; S390X-NEXT: ler %f9, %f0
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: brasl %r14, floorf at PLT
-; S390X-NEXT: ler %f4, %f0
-; S390X-NEXT: ler %f0, %f8
; S390X-NEXT: ler %f2, %f9
+; S390X-NEXT: ler %f4, %f8
; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 288(%r15)
@@ -6012,8 +5967,7 @@ define <2 x double> @constrained_vector_round_v2f64() #0 {
; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, round at PLT
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: ldr %f0, %f8
+; S390X-NEXT: ldr %f2, %f8
; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 280(%r15)
; S390X-NEXT: br %r14
@@ -6057,9 +6011,8 @@ define <3 x float> @constrained_vector_round_v3f32() #0 {
; S390X-NEXT: ler %f9, %f0
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: brasl %r14, roundf at PLT
-; S390X-NEXT: ler %f4, %f0
-; S390X-NEXT: ler %f0, %f8
; S390X-NEXT: ler %f2, %f9
+; S390X-NEXT: ler %f4, %f8
; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 288(%r15)
@@ -6188,8 +6141,7 @@ define <2 x double> @constrained_vector_trunc_v2f64() #0 {
; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, trunc at PLT
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: ldr %f0, %f8
+; S390X-NEXT: ldr %f2, %f8
; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 280(%r15)
; S390X-NEXT: br %r14
@@ -6233,9 +6185,8 @@ define <3 x float> @constrained_vector_trunc_v3f32() #0 {
; S390X-NEXT: ler %f9, %f0
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: brasl %r14, truncf at PLT
-; S390X-NEXT: ler %f4, %f0
-; S390X-NEXT: ler %f0, %f8
; S390X-NEXT: ler %f2, %f9
+; S390X-NEXT: ler %f4, %f8
; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload
; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload
; S390X-NEXT: lmg %r14, %r15, 288(%r15)
diff --git a/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll b/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
index f881718145cd..101c47fd0ca3 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
@@ -8,10 +8,15 @@
define float @f17() #0 {
; NOFMA-LABEL: f17:
; NOFMA: # %bb.0: # %entry
+; NOFMA-NEXT: pushq %rax
+; NOFMA-NEXT: .cfi_def_cfa_offset 16
; NOFMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; NOFMA-NEXT: movaps %xmm0, %xmm1
; NOFMA-NEXT: movaps %xmm0, %xmm2
-; NOFMA-NEXT: jmp fmaf # TAILCALL
+; NOFMA-NEXT: callq fmaf
+; NOFMA-NEXT: popq %rax
+; NOFMA-NEXT: .cfi_def_cfa_offset 8
+; NOFMA-NEXT: retq
;
; FMA-LABEL: f17:
; FMA: # %bb.0: # %entry
@@ -33,10 +38,15 @@ entry:
define double @f18() #0 {
; NOFMA-LABEL: f18:
; NOFMA: # %bb.0: # %entry
+; NOFMA-NEXT: pushq %rax
+; NOFMA-NEXT: .cfi_def_cfa_offset 16
; NOFMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; NOFMA-NEXT: movaps %xmm0, %xmm1
; NOFMA-NEXT: movaps %xmm0, %xmm2
-; NOFMA-NEXT: jmp fma # TAILCALL
+; NOFMA-NEXT: callq fma
+; NOFMA-NEXT: popq %rax
+; NOFMA-NEXT: .cfi_def_cfa_offset 8
+; NOFMA-NEXT: retq
;
; FMA-LABEL: f18:
; FMA: # %bb.0: # %entry
diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll
index 07655e80665e..9841b9fc1054 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll
@@ -257,15 +257,25 @@ define double @f6() #0 {
;
; SSE-LABEL: f6:
; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT: jmp pow # TAILCALL
+; SSE-NEXT: callq pow
+; SSE-NEXT: popq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
;
; AVX-LABEL: f6:
; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: jmp pow # TAILCALL
+; AVX-NEXT: callq pow
+; AVX-NEXT: popq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.pow.f64(double 42.1,
double 3.0,
@@ -290,15 +300,25 @@ define double @f7() #0 {
;
; SSE-LABEL: f7:
; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movl $3, %edi
-; SSE-NEXT: jmp __powidf2 # TAILCALL
+; SSE-NEXT: callq __powidf2
+; SSE-NEXT: popq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
;
; AVX-LABEL: f7:
; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: movl $3, %edi
-; AVX-NEXT: jmp __powidf2 # TAILCALL
+; AVX-NEXT: callq __powidf2
+; AVX-NEXT: popq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.powi.f64(double 42.1,
i32 3,
@@ -322,13 +342,23 @@ define double @f8() #0 {
;
; SSE-LABEL: f8:
; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: jmp sin # TAILCALL
+; SSE-NEXT: callq sin
+; SSE-NEXT: popq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
;
; AVX-LABEL: f8:
; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: jmp sin # TAILCALL
+; AVX-NEXT: callq sin
+; AVX-NEXT: popq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.sin.f64(double 42.0,
metadata !"round.dynamic",
@@ -351,13 +381,23 @@ define double @f9() #0 {
;
; SSE-LABEL: f9:
; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: jmp cos # TAILCALL
+; SSE-NEXT: callq cos
+; SSE-NEXT: popq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
;
; AVX-LABEL: f9:
; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: jmp cos # TAILCALL
+; AVX-NEXT: callq cos
+; AVX-NEXT: popq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.cos.f64(double 42.0,
metadata !"round.dynamic",
@@ -380,13 +420,23 @@ define double @f10() #0 {
;
; SSE-LABEL: f10:
; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: jmp exp # TAILCALL
+; SSE-NEXT: callq exp
+; SSE-NEXT: popq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
;
; AVX-LABEL: f10:
; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: jmp exp # TAILCALL
+; AVX-NEXT: callq exp
+; AVX-NEXT: popq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.exp.f64(double 42.0,
metadata !"round.dynamic",
@@ -409,13 +459,23 @@ define double @f11() #0 {
;
; SSE-LABEL: f11:
; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: jmp exp2 # TAILCALL
+; SSE-NEXT: callq exp2
+; SSE-NEXT: popq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
;
; AVX-LABEL: f11:
; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: jmp exp2 # TAILCALL
+; AVX-NEXT: callq exp2
+; AVX-NEXT: popq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.exp2.f64(double 42.1,
metadata !"round.dynamic",
@@ -438,13 +498,23 @@ define double @f12() #0 {
;
; SSE-LABEL: f12:
; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: jmp log # TAILCALL
+; SSE-NEXT: callq log
+; SSE-NEXT: popq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
;
; AVX-LABEL: f12:
; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: jmp log # TAILCALL
+; AVX-NEXT: callq log
+; AVX-NEXT: popq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.log.f64(double 42.0,
metadata !"round.dynamic",
@@ -467,13 +537,23 @@ define double @f13() #0 {
;
; SSE-LABEL: f13:
; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: jmp log10 # TAILCALL
+; SSE-NEXT: callq log10
+; SSE-NEXT: popq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
;
; AVX-LABEL: f13:
; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: jmp log10 # TAILCALL
+; AVX-NEXT: callq log10
+; AVX-NEXT: popq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.log10.f64(double 42.0,
metadata !"round.dynamic",
@@ -496,13 +576,23 @@ define double @f14() #0 {
;
; SSE-LABEL: f14:
; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: jmp log2 # TAILCALL
+; SSE-NEXT: callq log2
+; SSE-NEXT: popq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
;
; AVX-LABEL: f14:
; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: jmp log2 # TAILCALL
+; AVX-NEXT: callq log2
+; AVX-NEXT: popq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.log2.f64(double 42.0,
metadata !"round.dynamic",
@@ -525,8 +615,13 @@ define double @f15() #0 {
;
; SSE-LABEL: f15:
; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: jmp rint # TAILCALL
+; SSE-NEXT: callq rint
+; SSE-NEXT: popq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
;
; AVX-LABEL: f15:
; AVX: # %bb.0: # %entry
@@ -556,8 +651,13 @@ define double @f16() #0 {
;
; SSE-LABEL: f16:
; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: jmp nearbyint # TAILCALL
+; SSE-NEXT: callq nearbyint
+; SSE-NEXT: popq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
;
; AVX-LABEL: f16:
; AVX: # %bb.0: # %entry
@@ -588,15 +688,25 @@ define double @f19() #0 {
;
; SSE-LABEL: f19:
; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT: jmp fmod # TAILCALL
+; SSE-NEXT: callq fmod
+; SSE-NEXT: popq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
;
; AVX-LABEL: f19:
; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: jmp fmod # TAILCALL
+; AVX-NEXT: callq fmod
+; AVX-NEXT: popq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
entry:
%rem = call double @llvm.experimental.constrained.frem.f64(
double 1.000000e+00,
@@ -768,9 +878,34 @@ entry:
}
define i32 @f23(double %x) #0 {
-; COMMON-LABEL: f23:
-; COMMON: # %bb.0: # %entry
-; COMMON-NEXT: jmp lrint # TAILCALL
+; X86-SSE-LABEL: f23:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: subl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 16
+; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE-NEXT: movsd %xmm0, (%esp)
+; X86-SSE-NEXT: calll lrint
+; X86-SSE-NEXT: addl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: f23:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
+; SSE-NEXT: callq lrint
+; SSE-NEXT: popq %rcx
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
+;
+; AVX-LABEL: f23:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: callq lrint
+; AVX-NEXT: popq %rcx
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
entry:
%result = call i32 @llvm.experimental.constrained.lrint.i32.f64(double %x,
metadata !"round.dynamic",
@@ -779,9 +914,34 @@ entry:
}
define i32 @f24(float %x) #0 {
-; COMMON-LABEL: f24:
-; COMMON: # %bb.0: # %entry
-; COMMON-NEXT: jmp lrintf # TAILCALL
+; X86-SSE-LABEL: f24:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: subl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 16
+; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE-NEXT: movss %xmm0, (%esp)
+; X86-SSE-NEXT: calll lrintf
+; X86-SSE-NEXT: addl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: f24:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
+; SSE-NEXT: callq lrintf
+; SSE-NEXT: popq %rcx
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
+;
+; AVX-LABEL: f24:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: callq lrintf
+; AVX-NEXT: popq %rcx
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
entry:
%result = call i32 @llvm.experimental.constrained.lrint.i32.f32(float %x,
metadata !"round.dynamic",
@@ -803,11 +963,21 @@ define i64 @f25(double %x) #0 {
;
; SSE-LABEL: f25:
; SSE: # %bb.0: # %entry
-; SSE-NEXT: jmp llrint # TAILCALL
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
+; SSE-NEXT: callq llrint
+; SSE-NEXT: popq %rcx
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
;
; AVX-LABEL: f25:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: jmp llrint # TAILCALL
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: callq llrint
+; AVX-NEXT: popq %rcx
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
entry:
%result = call i64 @llvm.experimental.constrained.llrint.i64.f64(double %x,
metadata !"round.dynamic",
@@ -829,11 +999,21 @@ define i64 @f26(float %x) {
;
; SSE-LABEL: f26:
; SSE: # %bb.0: # %entry
-; SSE-NEXT: jmp llrintf # TAILCALL
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
+; SSE-NEXT: callq llrintf
+; SSE-NEXT: popq %rcx
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
;
; AVX-LABEL: f26:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: jmp llrintf # TAILCALL
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: callq llrintf
+; AVX-NEXT: popq %rcx
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
entry:
%result = call i64 @llvm.experimental.constrained.llrint.i64.f32(float %x,
metadata !"round.dynamic",
@@ -842,9 +1022,34 @@ entry:
}
define i32 @f27(double %x) #0 {
-; COMMON-LABEL: f27:
-; COMMON: # %bb.0: # %entry
-; COMMON-NEXT: jmp lround # TAILCALL
+; X86-SSE-LABEL: f27:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: subl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 16
+; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE-NEXT: movsd %xmm0, (%esp)
+; X86-SSE-NEXT: calll lround
+; X86-SSE-NEXT: addl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: f27:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
+; SSE-NEXT: callq lround
+; SSE-NEXT: popq %rcx
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
+;
+; AVX-LABEL: f27:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: callq lround
+; AVX-NEXT: popq %rcx
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
entry:
%result = call i32 @llvm.experimental.constrained.lround.i32.f64(double %x,
metadata !"fpexcept.strict") #0
@@ -852,9 +1057,34 @@ entry:
}
define i32 @f28(float %x) #0 {
-; COMMON-LABEL: f28:
-; COMMON: # %bb.0: # %entry
-; COMMON-NEXT: jmp lroundf # TAILCALL
+; X86-SSE-LABEL: f28:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: subl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 16
+; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE-NEXT: movss %xmm0, (%esp)
+; X86-SSE-NEXT: calll lroundf
+; X86-SSE-NEXT: addl $12, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: f28:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
+; SSE-NEXT: callq lroundf
+; SSE-NEXT: popq %rcx
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
+;
+; AVX-LABEL: f28:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: callq lroundf
+; AVX-NEXT: popq %rcx
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
entry:
%result = call i32 @llvm.experimental.constrained.lround.i32.f32(float %x,
metadata !"fpexcept.strict") #0
@@ -875,11 +1105,21 @@ define i64 @f29(double %x) #0 {
;
; SSE-LABEL: f29:
; SSE: # %bb.0: # %entry
-; SSE-NEXT: jmp llround # TAILCALL
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
+; SSE-NEXT: callq llround
+; SSE-NEXT: popq %rcx
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
;
; AVX-LABEL: f29:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: jmp llround # TAILCALL
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: callq llround
+; AVX-NEXT: popq %rcx
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
entry:
%result = call i64 @llvm.experimental.constrained.llround.i64.f64(double %x,
metadata !"fpexcept.strict") #0
@@ -900,11 +1140,21 @@ define i64 @f30(float %x) #0 {
;
; SSE-LABEL: f30:
; SSE: # %bb.0: # %entry
-; SSE-NEXT: jmp llroundf # TAILCALL
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
+; SSE-NEXT: callq llroundf
+; SSE-NEXT: popq %rcx
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
;
; AVX-LABEL: f30:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: jmp llroundf # TAILCALL
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: callq llroundf
+; AVX-NEXT: popq %rcx
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
entry:
%result = call i64 @llvm.experimental.constrained.llround.i64.f32(float %x,
metadata !"fpexcept.strict") #0
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index 2e01b10b3205..6850410e0747 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -292,9 +292,9 @@ define <3 x double> @constrained_vector_frem_v3f64() #0 {
; CHECK-NEXT: callq fmod
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -1102,9 +1102,9 @@ define <3 x double> @constrained_vector_pow_v3f64() #0 {
; CHECK-NEXT: callq pow
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -1358,9 +1358,9 @@ define <3 x double> @constrained_vector_powi_v3f64() #0 {
; CHECK-NEXT: callq __powidf2
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -1595,9 +1595,9 @@ define <3 x double> @constrained_vector_sin_v3f64() #0 {
; CHECK-NEXT: callq sin
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -1819,9 +1819,9 @@ define <3 x double> @constrained_vector_cos_v3f64() #0 {
; CHECK-NEXT: callq cos
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -2043,9 +2043,9 @@ define <3 x double> @constrained_vector_exp_v3f64() #0 {
; CHECK-NEXT: callq exp
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -2267,9 +2267,9 @@ define <3 x double> @constrained_vector_exp2_v3f64() #0 {
; CHECK-NEXT: callq exp2
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -2491,9 +2491,9 @@ define <3 x double> @constrained_vector_log_v3f64() #0 {
; CHECK-NEXT: callq log
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -2715,9 +2715,9 @@ define <3 x double> @constrained_vector_log10_v3f64() #0 {
; CHECK-NEXT: callq log10
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -2939,9 +2939,9 @@ define <3 x double> @constrained_vector_log2_v3f64() #0 {
; CHECK-NEXT: callq log2
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -3141,9 +3141,9 @@ define <3 x double> @constrained_vector_rint_v3f64() #0 {
; CHECK-NEXT: callq rint
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -3311,9 +3311,9 @@ define <3 x double> @constrained_vector_nearby_v3f64() #0 {
; CHECK-NEXT: callq nearbyint
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -3520,9 +3520,9 @@ define <3 x double> @constrained_vector_max_v3f64() #0 {
; CHECK-NEXT: callq fmax
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -3775,9 +3775,9 @@ define <3 x double> @constrained_vector_min_v3f64() #0 {
; CHECK-NEXT: callq fmin
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -5160,9 +5160,9 @@ define <3 x double> @constrained_vector_ceil_v3f64() #0 {
; CHECK-NEXT: callq ceil
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -5292,9 +5292,9 @@ define <3 x double> @constrained_vector_floor_v3f64() #0 {
; CHECK-NEXT: callq floor
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -5446,9 +5446,9 @@ define <3 x double> @constrained_vector_round_v3f64() #0 {
; CHECK-NEXT: callq round
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -5590,9 +5590,9 @@ define <3 x double> @constrained_vector_trunc_v3f64() #0 {
; CHECK-NEXT: callq trunc
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
More information about the llvm-commits
mailing list