[llvm] r345650 - [FPEnv] [FPEnv] Add constrained intrinsics for MAXNUM and MINNUM
Cameron McInally via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 30 14:01:29 PDT 2018
Author: mcinally
Date: Tue Oct 30 14:01:29 2018
New Revision: 345650
URL: http://llvm.org/viewvc/llvm-project?rev=345650&view=rev
Log:
[FPEnv] [FPEnv] Add constrained intrinsics for MAXNUM and MINNUM
Differential Revision: https://reviews.llvm.org/D53216
Modified:
llvm/trunk/docs/LangRef.rst
llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h
llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h
llvm/trunk/include/llvm/CodeGen/TargetLowering.h
llvm/trunk/include/llvm/IR/IntrinsicInst.h
llvm/trunk/include/llvm/IR/Intrinsics.td
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
llvm/trunk/lib/IR/Verifier.cpp
llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
Modified: llvm/trunk/docs/LangRef.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/LangRef.rst?rev=345650&r1=345649&r2=345650&view=diff
==============================================================================
--- llvm/trunk/docs/LangRef.rst (original)
+++ llvm/trunk/docs/LangRef.rst Tue Oct 30 14:01:29 2018
@@ -14485,6 +14485,80 @@ mode is determined by the runtime floati
mode argument is only intended as information to the compiler.
+'``llvm.experimental.constrained.maxnum``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+ declare <type>
+ @llvm.experimental.constrained.maxnum(<type> <op1>, <type> <op2>
+ metadata <rounding mode>,
+ metadata <exception behavior>)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.constrained.maxnum``' intrinsic returns the maximum
+of the two arguments.
+
+Arguments:
+""""""""""
+
+The first two arguments and the return value are floating-point numbers
+of the same type.
+
+The third and forth arguments specify the rounding mode and exception
+behavior as described above.
+
+Semantics:
+""""""""""
+
+This function follows the IEEE-754 semantics for maxNum. The rounding mode is
+described, not determined, by the rounding mode argument. The actual rounding
+mode is determined by the runtime floating-point environment. The rounding
+mode argument is only intended as information to the compiler.
+
+
+'``llvm.experimental.constrained.minnum``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+ declare <type>
+ @llvm.experimental.constrained.minnum(<type> <op1>, <type> <op2>
+ metadata <rounding mode>,
+ metadata <exception behavior>)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.constrained.minnum``' intrinsic returns the minimum
+of the two arguments.
+
+Arguments:
+""""""""""
+
+The first two arguments and the return value are floating-point numbers
+of the same type.
+
+The third and forth arguments specify the rounding mode and exception
+behavior as described above.
+
+Semantics:
+""""""""""
+
+This function follows the IEEE-754 semantics for minNum. The rounding mode is
+described, not determined, by the rounding mode argument. The actual rounding
+mode is determined by the runtime floating-point environment. The rounding
+mode argument is only intended as information to the compiler.
+
+
General Intrinsics
------------------
Modified: llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h?rev=345650&r1=345649&r2=345650&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h (original)
+++ llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h Tue Oct 30 14:01:29 2018
@@ -288,7 +288,7 @@ namespace ISD {
/// They are used to limit optimizations while the DAG is being optimized.
STRICT_FSQRT, STRICT_FPOW, STRICT_FPOWI, STRICT_FSIN, STRICT_FCOS,
STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, STRICT_FLOG10, STRICT_FLOG2,
- STRICT_FRINT, STRICT_FNEARBYINT,
+ STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM,
/// FMA - Perform a * b + c with no intermediate rounding step.
FMA,
Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h?rev=345650&r1=345649&r2=345650&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h (original)
+++ llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h Tue Oct 30 14:01:29 2018
@@ -672,6 +672,8 @@ public:
case ISD::STRICT_FLOG2:
case ISD::STRICT_FRINT:
case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
return true;
}
}
Modified: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLowering.h?rev=345650&r1=345649&r2=345650&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h Tue Oct 30 14:01:29 2018
@@ -819,6 +819,8 @@ public:
case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break;
case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break;
case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break;
+ case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break;
+ case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break;
}
auto Action = getOperationAction(EqOpc, VT);
Modified: llvm/trunk/include/llvm/IR/IntrinsicInst.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicInst.h?rev=345650&r1=345649&r2=345650&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicInst.h (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicInst.h Tue Oct 30 14:01:29 2018
@@ -251,6 +251,8 @@ namespace llvm {
case Intrinsic::experimental_constrained_log2:
case Intrinsic::experimental_constrained_rint:
case Intrinsic::experimental_constrained_nearbyint:
+ case Intrinsic::experimental_constrained_maxnum:
+ case Intrinsic::experimental_constrained_minnum:
return true;
default: return false;
}
Modified: llvm/trunk/include/llvm/IR/Intrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/Intrinsics.td?rev=345650&r1=345649&r2=345650&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/Intrinsics.td (original)
+++ llvm/trunk/include/llvm/IR/Intrinsics.td Tue Oct 30 14:01:29 2018
@@ -565,6 +565,16 @@ let IntrProperties = [IntrInaccessibleMe
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
+ def int_experimental_constrained_maxnum : Intrinsic<[ llvm_anyfloat_ty ],
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_metadata_ty,
+ llvm_metadata_ty ]>;
+ def int_experimental_constrained_minnum : Intrinsic<[ llvm_anyfloat_ty ],
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_metadata_ty,
+ llvm_metadata_ty ]>;
}
// FIXME: Add intrinsics for fcmp, fptrunc, fpext, fptoui and fptosi.
// FIXME: Add intrinsics for fabs, copysign, floor, ceil, trunc and round?
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=345650&r1=345649&r2=345650&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Tue Oct 30 14:01:29 2018
@@ -1107,6 +1107,8 @@ void SelectionDAGLegalize::LegalizeOp(SD
case ISD::STRICT_FLOG2:
case ISD::STRICT_FRINT:
case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
// These pseudo-ops get legalized as if they were their non-strict
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
// is also legal, but if ISD::FSQRT requires expansion then so does
@@ -3833,11 +3835,13 @@ void SelectionDAGLegalize::ConvertNodeTo
break;
}
case ISD::FMINNUM:
+ case ISD::STRICT_FMINNUM:
Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
RTLIB::FMIN_F80, RTLIB::FMIN_F128,
RTLIB::FMIN_PPCF128));
break;
case ISD::FMAXNUM:
+ case ISD::STRICT_FMAXNUM:
Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
RTLIB::FMAX_F80, RTLIB::FMAX_F128,
RTLIB::FMAX_PPCF128));
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp?rev=345650&r1=345649&r2=345650&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp Tue Oct 30 14:01:29 2018
@@ -305,6 +305,8 @@ SDValue VectorLegalizer::LegalizeOp(SDVa
case ISD::STRICT_FLOG2:
case ISD::STRICT_FRINT:
case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
// These pseudo-ops get legalized as if they were their non-strict
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
// is also legal, but if ISD::FSQRT requires expansion then so does
@@ -751,6 +753,8 @@ SDValue VectorLegalizer::Expand(SDValue
case ISD::STRICT_FLOG2:
case ISD::STRICT_FRINT:
case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
return ExpandStrictFPOp(Op);
default:
return DAG.UnrollVectorOp(Op.getNode());
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp?rev=345650&r1=345649&r2=345650&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Tue Oct 30 14:01:29 2018
@@ -164,6 +164,8 @@ void DAGTypeLegalizer::ScalarizeVectorRe
case ISD::STRICT_FLOG2:
case ISD::STRICT_FRINT:
case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
R = ScalarizeVecRes_StrictFPOp(N);
break;
}
@@ -834,6 +836,8 @@ void DAGTypeLegalizer::SplitVectorResult
case ISD::STRICT_FLOG2:
case ISD::STRICT_FRINT:
case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
SplitVecRes_StrictFPOp(N, Lo, Hi);
break;
}
@@ -2400,6 +2404,8 @@ void DAGTypeLegalizer::WidenVectorResult
case ISD::STRICT_FLOG2:
case ISD::STRICT_FRINT:
case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
Res = WidenVecRes_StrictFP(N);
break;
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=345650&r1=345649&r2=345650&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Tue Oct 30 14:01:29 2018
@@ -7404,6 +7404,8 @@ SDNode* SelectionDAG::mutateStrictFPToFP
NewOpc = ISD::FNEARBYINT;
IsUnary = true;
break;
+ case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break;
+ case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break;
}
// We're taking this node out of the chain, so we need to re-link things.
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=345650&r1=345649&r2=345650&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Tue Oct 30 14:01:29 2018
@@ -5627,6 +5627,8 @@ SelectionDAGBuilder::visitIntrinsicCall(
case Intrinsic::experimental_constrained_log2:
case Intrinsic::experimental_constrained_rint:
case Intrinsic::experimental_constrained_nearbyint:
+ case Intrinsic::experimental_constrained_maxnum:
+ case Intrinsic::experimental_constrained_minnum:
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
return nullptr;
case Intrinsic::fmuladd: {
@@ -6374,6 +6376,12 @@ void SelectionDAGBuilder::visitConstrain
case Intrinsic::experimental_constrained_nearbyint:
Opcode = ISD::STRICT_FNEARBYINT;
break;
+ case Intrinsic::experimental_constrained_maxnum:
+ Opcode = ISD::STRICT_FMAXNUM;
+ break;
+ case Intrinsic::experimental_constrained_minnum:
+ Opcode = ISD::STRICT_FMINNUM;
+ break;
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Chain = getRoot();
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp?rev=345650&r1=345649&r2=345650&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp Tue Oct 30 14:01:29 2018
@@ -175,7 +175,9 @@ std::string SDNode::getOperationName(con
// Unary operators
case ISD::FABS: return "fabs";
case ISD::FMINNUM: return "fminnum";
+ case ISD::STRICT_FMINNUM: return "strict_fminnum";
case ISD::FMAXNUM: return "fmaxnum";
+ case ISD::STRICT_FMAXNUM: return "strict_fmaxnum";
case ISD::FMINNUM_IEEE: return "fminnum_ieee";
case ISD::FMAXNUM_IEEE: return "fmaxnum_ieee";
case ISD::FMINIMUM: return "fminimum";
Modified: llvm/trunk/lib/IR/Verifier.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/Verifier.cpp?rev=345650&r1=345649&r2=345650&view=diff
==============================================================================
--- llvm/trunk/lib/IR/Verifier.cpp (original)
+++ llvm/trunk/lib/IR/Verifier.cpp Tue Oct 30 14:01:29 2018
@@ -4104,6 +4104,8 @@ void Verifier::visitIntrinsicCallSite(In
case Intrinsic::experimental_constrained_log2:
case Intrinsic::experimental_constrained_rint:
case Intrinsic::experimental_constrained_nearbyint:
+ case Intrinsic::experimental_constrained_maxnum:
+ case Intrinsic::experimental_constrained_minnum:
visitConstrainedFPIntrinsic(
cast<ConstrainedFPIntrinsic>(*CS.getInstruction()));
break;
Modified: llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll?rev=345650&r1=345649&r2=345650&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll Tue Oct 30 14:01:29 2018
@@ -3668,6 +3668,515 @@ entry:
ret <4 x double> %nearby
}
+define <1 x float> @constrained_vector_maxnum_v1f32() {
+; NO-FMA-LABEL: constrained_vector_maxnum_v1f32:
+; NO-FMA: # %bb.0: # %entry
+; NO-FMA-NEXT: pushq %rax
+; NO-FMA-NEXT: .cfi_def_cfa_offset 16
+; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; NO-FMA-NEXT: callq fmaxf
+; NO-FMA-NEXT: popq %rax
+; NO-FMA-NEXT: .cfi_def_cfa_offset 8
+; NO-FMA-NEXT: retq
+;
+; HAS-FMA-LABEL: constrained_vector_maxnum_v1f32:
+; HAS-FMA: # %bb.0: # %entry
+; HAS-FMA-NEXT: pushq %rax
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 16
+; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; HAS-FMA-NEXT: callq fmaxf
+; HAS-FMA-NEXT: popq %rax
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
+; HAS-FMA-NEXT: retq
+entry:
+ %max = call <1 x float> @llvm.experimental.constrained.maxnum.v1f32(
+ <1 x float> <float 42.0>, <1 x float> <float 41.0>,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <1 x float> %max
+}
+
+define <2 x double> @constrained_vector_maxnum_v2f64() {
+; NO-FMA-LABEL: constrained_vector_maxnum_v2f64:
+; NO-FMA: # %bb.0: # %entry
+; NO-FMA-NEXT: subq $24, %rsp
+; NO-FMA-NEXT: .cfi_def_cfa_offset 32
+; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; NO-FMA-NEXT: callq fmax
+; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; NO-FMA-NEXT: callq fmax
+; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
+; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
+; NO-FMA-NEXT: addq $24, %rsp
+; NO-FMA-NEXT: .cfi_def_cfa_offset 8
+; NO-FMA-NEXT: retq
+;
+; HAS-FMA-LABEL: constrained_vector_maxnum_v2f64:
+; HAS-FMA: # %bb.0: # %entry
+; HAS-FMA-NEXT: subq $24, %rsp
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 32
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; HAS-FMA-NEXT: callq fmax
+; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; HAS-FMA-NEXT: callq fmax
+; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
+; HAS-FMA-NEXT: addq $24, %rsp
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
+; HAS-FMA-NEXT: retq
+entry:
+ %max = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(
+ <2 x double> <double 43.0, double 42.0>,
+ <2 x double> <double 41.0, double 40.0>,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %max
+}
+
+define <3 x float> @constrained_vector_maxnum_v3f32() {
+; NO-FMA-LABEL: constrained_vector_maxnum_v3f32:
+; NO-FMA: # %bb.0: # %entry
+; NO-FMA-NEXT: subq $40, %rsp
+; NO-FMA-NEXT: .cfi_def_cfa_offset 48
+; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; NO-FMA-NEXT: callq fmaxf
+; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; NO-FMA-NEXT: callq fmaxf
+; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; NO-FMA-NEXT: callq fmaxf
+; NO-FMA-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
+; NO-FMA-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
+; NO-FMA-NEXT: movaps %xmm1, %xmm0
+; NO-FMA-NEXT: addq $40, %rsp
+; NO-FMA-NEXT: .cfi_def_cfa_offset 8
+; NO-FMA-NEXT: retq
+;
+; HAS-FMA-LABEL: constrained_vector_maxnum_v3f32:
+; HAS-FMA: # %bb.0: # %entry
+; HAS-FMA-NEXT: subq $40, %rsp
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
+; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; HAS-FMA-NEXT: callq fmaxf
+; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; HAS-FMA-NEXT: callq fmaxf
+; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; HAS-FMA-NEXT: callq fmaxf
+; HAS-FMA-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; HAS-FMA-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; HAS-FMA-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
+; HAS-FMA-NEXT: addq $40, %rsp
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
+; HAS-FMA-NEXT: retq
+entry:
+ %max = call <3 x float> @llvm.experimental.constrained.maxnum.v3f32(
+ <3 x float> <float 43.0, float 44.0, float 45.0>,
+ <3 x float> <float 41.0, float 42.0, float 43.0>,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <3 x float> %max
+}
+
+define <3 x double> @constrained_vector_max_v3f64() {
+; NO-FMA-LABEL: constrained_vector_max_v3f64:
+; NO-FMA: # %bb.0: # %entry
+; NO-FMA-NEXT: subq $24, %rsp
+; NO-FMA-NEXT: .cfi_def_cfa_offset 32
+; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; NO-FMA-NEXT: callq fmax
+; NO-FMA-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; NO-FMA-NEXT: callq fmax
+; NO-FMA-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
+; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; NO-FMA-NEXT: callq fmax
+; NO-FMA-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
+; NO-FMA-NEXT: fldl {{[0-9]+}}(%rsp)
+; NO-FMA-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; NO-FMA-NEXT: # xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; NO-FMA-NEXT: # xmm1 = mem[0],zero
+; NO-FMA-NEXT: addq $24, %rsp
+; NO-FMA-NEXT: .cfi_def_cfa_offset 8
+; NO-FMA-NEXT: retq
+;
+; HAS-FMA-LABEL: constrained_vector_max_v3f64:
+; HAS-FMA: # %bb.0: # %entry
+; HAS-FMA-NEXT: subq $56, %rsp
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 64
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; HAS-FMA-NEXT: callq fmax
+; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; HAS-FMA-NEXT: callq fmax
+; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
+; HAS-FMA-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; HAS-FMA-NEXT: vzeroupper
+; HAS-FMA-NEXT: callq fmax
+; HAS-FMA-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
+; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; HAS-FMA-NEXT: addq $56, %rsp
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
+; HAS-FMA-NEXT: retq
+entry:
+ %max = call <3 x double> @llvm.experimental.constrained.maxnum.v3f64(
+ <3 x double> <double 43.0, double 44.0, double 45.0>,
+ <3 x double> <double 40.0, double 41.0, double 42.0>,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <3 x double> %max
+}
+
+define <4 x double> @constrained_vector_maxnum_v4f64() {
+; NO-FMA-LABEL: constrained_vector_maxnum_v4f64:
+; NO-FMA: # %bb.0: # %entry
+; NO-FMA-NEXT: subq $40, %rsp
+; NO-FMA-NEXT: .cfi_def_cfa_offset 48
+; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; NO-FMA-NEXT: callq fmax
+; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; NO-FMA-NEXT: callq fmax
+; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
+; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
+; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; NO-FMA-NEXT: callq fmax
+; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; NO-FMA-NEXT: callq fmax
+; NO-FMA-NEXT: movaps %xmm0, %xmm1
+; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
+; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; NO-FMA-NEXT: addq $40, %rsp
+; NO-FMA-NEXT: .cfi_def_cfa_offset 8
+; NO-FMA-NEXT: retq
+;
+; HAS-FMA-LABEL: constrained_vector_maxnum_v4f64:
+; HAS-FMA: # %bb.0: # %entry
+; HAS-FMA-NEXT: subq $40, %rsp
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; HAS-FMA-NEXT: callq fmax
+; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; HAS-FMA-NEXT: callq fmax
+; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
+; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; HAS-FMA-NEXT: callq fmax
+; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; HAS-FMA-NEXT: callq fmax
+; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
+; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; HAS-FMA-NEXT: addq $40, %rsp
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
+; HAS-FMA-NEXT: retq
+entry:
+ %max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64(
+ <4 x double> <double 44.0, double 45.0,
+ double 46.0, double 47.0>,
+ <4 x double> <double 40.0, double 41.0,
+ double 42.0, double 43.0>,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x double> %max
+}
+
+define <1 x float> @constrained_vector_minnum_v1f32() {
+; NO-FMA-LABEL: constrained_vector_minnum_v1f32:
+; NO-FMA: # %bb.0: # %entry
+; NO-FMA-NEXT: pushq %rax
+; NO-FMA-NEXT: .cfi_def_cfa_offset 16
+; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; NO-FMA-NEXT: callq fminf
+; NO-FMA-NEXT: popq %rax
+; NO-FMA-NEXT: .cfi_def_cfa_offset 8
+; NO-FMA-NEXT: retq
+;
+; HAS-FMA-LABEL: constrained_vector_minnum_v1f32:
+; HAS-FMA: # %bb.0: # %entry
+; HAS-FMA-NEXT: pushq %rax
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 16
+; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; HAS-FMA-NEXT: callq fminf
+; HAS-FMA-NEXT: popq %rax
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
+; HAS-FMA-NEXT: retq
+entry:
+ %min = call <1 x float> @llvm.experimental.constrained.minnum.v1f32(
+ <1 x float> <float 42.0>, <1 x float> <float 41.0>,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <1 x float> %min
+}
+
+define <2 x double> @constrained_vector_minnum_v2f64() {
+; NO-FMA-LABEL: constrained_vector_minnum_v2f64:
+; NO-FMA: # %bb.0: # %entry
+; NO-FMA-NEXT: subq $24, %rsp
+; NO-FMA-NEXT: .cfi_def_cfa_offset 32
+; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; NO-FMA-NEXT: callq fmin
+; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; NO-FMA-NEXT: callq fmin
+; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
+; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
+; NO-FMA-NEXT: addq $24, %rsp
+; NO-FMA-NEXT: .cfi_def_cfa_offset 8
+; NO-FMA-NEXT: retq
+;
+; HAS-FMA-LABEL: constrained_vector_minnum_v2f64:
+; HAS-FMA: # %bb.0: # %entry
+; HAS-FMA-NEXT: subq $24, %rsp
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 32
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; HAS-FMA-NEXT: callq fmin
+; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; HAS-FMA-NEXT: callq fmin
+; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
+; HAS-FMA-NEXT: addq $24, %rsp
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
+; HAS-FMA-NEXT: retq
+entry:
+ %min = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(
+ <2 x double> <double 43.0, double 42.0>,
+ <2 x double> <double 41.0, double 40.0>,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %min
+}
+
+define <3 x float> @constrained_vector_minnum_v3f32() {
+; NO-FMA-LABEL: constrained_vector_minnum_v3f32:
+; NO-FMA: # %bb.0: # %entry
+; NO-FMA-NEXT: subq $40, %rsp
+; NO-FMA-NEXT: .cfi_def_cfa_offset 48
+; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; NO-FMA-NEXT: callq fminf
+; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; NO-FMA-NEXT: callq fminf
+; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; NO-FMA-NEXT: callq fminf
+; NO-FMA-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
+; NO-FMA-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
+; NO-FMA-NEXT: movaps %xmm1, %xmm0
+; NO-FMA-NEXT: addq $40, %rsp
+; NO-FMA-NEXT: .cfi_def_cfa_offset 8
+; NO-FMA-NEXT: retq
+;
+; HAS-FMA-LABEL: constrained_vector_minnum_v3f32:
+; HAS-FMA: # %bb.0: # %entry
+; HAS-FMA-NEXT: subq $40, %rsp
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
+; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; HAS-FMA-NEXT: callq fminf
+; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; HAS-FMA-NEXT: callq fminf
+; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; HAS-FMA-NEXT: callq fminf
+; HAS-FMA-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; HAS-FMA-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; HAS-FMA-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
+; HAS-FMA-NEXT: addq $40, %rsp
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
+; HAS-FMA-NEXT: retq
+entry:
+ %min = call <3 x float> @llvm.experimental.constrained.minnum.v3f32(
+ <3 x float> <float 43.0, float 44.0, float 45.0>,
+ <3 x float> <float 41.0, float 42.0, float 43.0>,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <3 x float> %min
+}
+
+define <3 x double> @constrained_vector_min_v3f64() {entry:
+; NO-FMA-LABEL: constrained_vector_min_v3f64:
+; NO-FMA: # %bb.0: # %entry
+; NO-FMA-NEXT: subq $24, %rsp
+; NO-FMA-NEXT: .cfi_def_cfa_offset 32
+; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; NO-FMA-NEXT: callq fmin
+; NO-FMA-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; NO-FMA-NEXT: callq fmin
+; NO-FMA-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
+; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; NO-FMA-NEXT: callq fmin
+; NO-FMA-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
+; NO-FMA-NEXT: fldl {{[0-9]+}}(%rsp)
+; NO-FMA-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; NO-FMA-NEXT: # xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
+; NO-FMA-NEXT: # xmm1 = mem[0],zero
+; NO-FMA-NEXT: addq $24, %rsp
+; NO-FMA-NEXT: .cfi_def_cfa_offset 8
+; NO-FMA-NEXT: retq
+;
+; HAS-FMA-LABEL: constrained_vector_min_v3f64:
+; HAS-FMA: # %bb.0: # %entry
+; HAS-FMA-NEXT: subq $56, %rsp
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 64
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; HAS-FMA-NEXT: callq fmin
+; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; HAS-FMA-NEXT: callq fmin
+; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
+; HAS-FMA-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; HAS-FMA-NEXT: vzeroupper
+; HAS-FMA-NEXT: callq fmin
+; HAS-FMA-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
+; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; HAS-FMA-NEXT: addq $56, %rsp
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
+; HAS-FMA-NEXT: retq
+ %min = call <3 x double> @llvm.experimental.constrained.minnum.v3f64(
+ <3 x double> <double 43.0, double 44.0, double 45.0>,
+ <3 x double> <double 40.0, double 41.0, double 42.0>,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <3 x double> %min
+}
+
+define <4 x double> @constrained_vector_minnum_v4f64() {
+; NO-FMA-LABEL: constrained_vector_minnum_v4f64:
+; NO-FMA: # %bb.0: # %entry
+; NO-FMA-NEXT: subq $40, %rsp
+; NO-FMA-NEXT: .cfi_def_cfa_offset 48
+; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; NO-FMA-NEXT: callq fmin
+; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; NO-FMA-NEXT: callq fmin
+; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
+; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
+; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; NO-FMA-NEXT: callq fmin
+; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; NO-FMA-NEXT: callq fmin
+; NO-FMA-NEXT: movaps %xmm0, %xmm1
+; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
+; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; NO-FMA-NEXT: addq $40, %rsp
+; NO-FMA-NEXT: .cfi_def_cfa_offset 8
+; NO-FMA-NEXT: retq
+;
+; HAS-FMA-LABEL: constrained_vector_minnum_v4f64:
+; HAS-FMA: # %bb.0: # %entry
+; HAS-FMA-NEXT: subq $40, %rsp
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; HAS-FMA-NEXT: callq fmin
+; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; HAS-FMA-NEXT: callq fmin
+; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
+; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; HAS-FMA-NEXT: callq fmin
+; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; HAS-FMA-NEXT: callq fmin
+; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
+; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; HAS-FMA-NEXT: addq $40, %rsp
+; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
+; HAS-FMA-NEXT: retq
+entry:
+ %min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64(
+ <4 x double> <double 44.0, double 45.0,
+ double 46.0, double 47.0>,
+ <4 x double> <double 40.0, double 41.0,
+ double 42.0, double 43.0>,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x double> %min
+}
+
; Single width declarations
declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
@@ -3688,6 +4197,8 @@ declare <2 x double> @llvm.experimental.
declare <2 x double> @llvm.experimental.constrained.log2.v2f64(<2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
; Scalar width declarations
declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata)
@@ -3708,6 +4219,8 @@ declare <1 x float> @llvm.experimental.c
declare <1 x float> @llvm.experimental.constrained.log2.v1f32(<1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.rint.v1f32(<1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
; Illegal width declarations
declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata)
@@ -3746,6 +4259,10 @@ declare <3 x float> @llvm.experimental.c
declare <3 x double> @llvm.experimental.constrained.rint.v3f64(<3 x double>, metadata, metadata)
declare <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(<3 x float>, metadata, metadata)
declare <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.maxnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
; Double width declarations
declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata)
@@ -3767,3 +4284,5 @@ declare <4 x double> @llvm.experimental.
declare <4 x double> @llvm.experimental.constrained.log2.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
More information about the llvm-commits
mailing list