[llvm] r227303 - AVX-512: Added FMA intrinsics with rounding mode
Adam Nemet
anemet at apple.com
Thu Jan 29 22:45:42 PST 2015
Hi Elena,
Great, thanks! I just have one follow-on request below.
> On Jan 28, 2015, at 2:21 AM, Elena Demikhovsky <elena.demikhovsky at intel.com> wrote:
>
> Author: delena
> Date: Wed Jan 28 04:21:27 2015
> New Revision: 227303
>
> URL: http://llvm.org/viewvc/llvm-project?rev=227303&view=rev
> Log:
> AVX-512: Added FMA intrinsics with rounding mode
> By Asaf Badouh and Elena Demikhovsky
>
> Added special nodes for rounding: FMADD_RND, FMSUB_RND..
> It will prevent merge between nodes with rounding and other standard nodes.
>
>
> Modified:
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> llvm/trunk/lib/Target/X86/X86ISelLowering.h
> llvm/trunk/lib/Target/X86/X86InstrAVX512.td
> llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
> llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
> llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=227303&r1=227302&r2=227303&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jan 28 04:21:27 2015
> @@ -17039,54 +17039,6 @@ static SDValue getScalarMaskingNode(SDVa
> return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
> }
>
> -static unsigned getOpcodeForFMAIntrinsic(unsigned IntNo) {
> - switch (IntNo) {
> - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
> - case Intrinsic::x86_fma_vfmadd_ps:
> - case Intrinsic::x86_fma_vfmadd_pd:
> - case Intrinsic::x86_fma_vfmadd_ps_256:
> - case Intrinsic::x86_fma_vfmadd_pd_256:
> - case Intrinsic::x86_fma_mask_vfmadd_ps_512:
> - case Intrinsic::x86_fma_mask_vfmadd_pd_512:
> - return X86ISD::FMADD;
> - case Intrinsic::x86_fma_vfmsub_ps:
> - case Intrinsic::x86_fma_vfmsub_pd:
> - case Intrinsic::x86_fma_vfmsub_ps_256:
> - case Intrinsic::x86_fma_vfmsub_pd_256:
> - case Intrinsic::x86_fma_mask_vfmsub_ps_512:
> - case Intrinsic::x86_fma_mask_vfmsub_pd_512:
> - return X86ISD::FMSUB;
> - case Intrinsic::x86_fma_vfnmadd_ps:
> - case Intrinsic::x86_fma_vfnmadd_pd:
> - case Intrinsic::x86_fma_vfnmadd_ps_256:
> - case Intrinsic::x86_fma_vfnmadd_pd_256:
> - case Intrinsic::x86_fma_mask_vfnmadd_ps_512:
> - case Intrinsic::x86_fma_mask_vfnmadd_pd_512:
> - return X86ISD::FNMADD;
> - case Intrinsic::x86_fma_vfnmsub_ps:
> - case Intrinsic::x86_fma_vfnmsub_pd:
> - case Intrinsic::x86_fma_vfnmsub_ps_256:
> - case Intrinsic::x86_fma_vfnmsub_pd_256:
> - case Intrinsic::x86_fma_mask_vfnmsub_ps_512:
> - case Intrinsic::x86_fma_mask_vfnmsub_pd_512:
> - return X86ISD::FNMSUB;
> - case Intrinsic::x86_fma_vfmaddsub_ps:
> - case Intrinsic::x86_fma_vfmaddsub_pd:
> - case Intrinsic::x86_fma_vfmaddsub_ps_256:
> - case Intrinsic::x86_fma_vfmaddsub_pd_256:
> - case Intrinsic::x86_fma_mask_vfmaddsub_ps_512:
> - case Intrinsic::x86_fma_mask_vfmaddsub_pd_512:
> - return X86ISD::FMADDSUB;
> - case Intrinsic::x86_fma_vfmsubadd_ps:
> - case Intrinsic::x86_fma_vfmsubadd_pd:
> - case Intrinsic::x86_fma_vfmsubadd_ps_256:
> - case Intrinsic::x86_fma_vfmsubadd_pd_256:
> - case Intrinsic::x86_fma_mask_vfmsubadd_ps_512:
> - case Intrinsic::x86_fma_mask_vfmsubadd_pd_512:
> - return X86ISD::FMSUBADD;
> - }
> -}
> -
> static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
> SelectionDAG &DAG) {
> SDLoc dl(Op);
> @@ -17123,9 +17075,43 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
> Mask, Src0, Subtarget, DAG);
> }
> case INTR_TYPE_2OP_MASK: {
> - return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
> + SDValue Mask = Op.getOperand(4);
> + SDValue PassThru = Op.getOperand(3);
> + unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
I think that in the enum where INTR_TYPE_2OP_MASK is defined it would be good to document the meaning of Opc1. If I remember correctly that differs across the different cases.
Thanks,
Adam
> + if (IntrWithRoundingModeOpcode != 0) {
> + unsigned Round = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
> + if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) {
> + return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
> + dl, Op.getValueType(),
> + Op.getOperand(1), Op.getOperand(2),
> + Op.getOperand(3), Op.getOperand(5)),
> + Mask, PassThru, Subtarget, DAG);
> + }
> + }
> + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
> + Op.getOperand(1),
> Op.getOperand(2)),
> - Op.getOperand(4), Op.getOperand(3), Subtarget, DAG);
> + Mask, PassThru, Subtarget, DAG);
> + }
> + case FMA_OP_MASK: {
> + SDValue Src1 = Op.getOperand(1);
> + SDValue Src2 = Op.getOperand(2);
> + SDValue Src3 = Op.getOperand(3);
> + SDValue Mask = Op.getOperand(4);
> + unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
> + if (IntrWithRoundingModeOpcode != 0) {
> + SDValue Rnd = Op.getOperand(5);
> + if (cast<ConstantSDNode>(Rnd)->getZExtValue() !=
> + X86::STATIC_ROUNDING::CUR_DIRECTION)
> + return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
> + dl, Op.getValueType(),
> + Src1, Src2, Src3, Rnd),
> + Mask, Src1, Subtarget, DAG);
> + }
> + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
> + dl, Op.getValueType(),
> + Src1, Src2, Src3),
> + Mask, Src1, Subtarget, DAG);
> }
> case CMP_MASK:
> case CMP_MASK_CC: {
> @@ -17215,16 +17201,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
> return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1),
> Op.getOperand(2));
> }
> - case FMA_OP_MASK:
> - {
> - return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
> - dl, Op.getValueType(),
> - Op.getOperand(1),
> - Op.getOperand(2),
> - Op.getOperand(3)),
> - Op.getOperand(4), Op.getOperand(1),
> - Subtarget, DAG);
> - }
> default:
> break;
> }
> @@ -17395,58 +17371,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
> SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
> return DAG.getNode(Opcode, dl, VTs, NewOps);
> }
> -
> - case Intrinsic::x86_fma_mask_vfmadd_ps_512:
> - case Intrinsic::x86_fma_mask_vfmadd_pd_512:
> - case Intrinsic::x86_fma_mask_vfmsub_ps_512:
> - case Intrinsic::x86_fma_mask_vfmsub_pd_512:
> - case Intrinsic::x86_fma_mask_vfnmadd_ps_512:
> - case Intrinsic::x86_fma_mask_vfnmadd_pd_512:
> - case Intrinsic::x86_fma_mask_vfnmsub_ps_512:
> - case Intrinsic::x86_fma_mask_vfnmsub_pd_512:
> - case Intrinsic::x86_fma_mask_vfmaddsub_ps_512:
> - case Intrinsic::x86_fma_mask_vfmaddsub_pd_512:
> - case Intrinsic::x86_fma_mask_vfmsubadd_ps_512:
> - case Intrinsic::x86_fma_mask_vfmsubadd_pd_512: {
> - auto *SAE = cast<ConstantSDNode>(Op.getOperand(5));
> - if (SAE->getZExtValue() == X86::STATIC_ROUNDING::CUR_DIRECTION)
> - return getVectorMaskingNode(DAG.getNode(getOpcodeForFMAIntrinsic(IntNo),
> - dl, Op.getValueType(),
> - Op.getOperand(1),
> - Op.getOperand(2),
> - Op.getOperand(3)),
> - Op.getOperand(4), Op.getOperand(1),
> - Subtarget, DAG);
> - else
> - return SDValue();
> - }
> -
> - case Intrinsic::x86_fma_vfmadd_ps:
> - case Intrinsic::x86_fma_vfmadd_pd:
> - case Intrinsic::x86_fma_vfmsub_ps:
> - case Intrinsic::x86_fma_vfmsub_pd:
> - case Intrinsic::x86_fma_vfnmadd_ps:
> - case Intrinsic::x86_fma_vfnmadd_pd:
> - case Intrinsic::x86_fma_vfnmsub_ps:
> - case Intrinsic::x86_fma_vfnmsub_pd:
> - case Intrinsic::x86_fma_vfmaddsub_ps:
> - case Intrinsic::x86_fma_vfmaddsub_pd:
> - case Intrinsic::x86_fma_vfmsubadd_ps:
> - case Intrinsic::x86_fma_vfmsubadd_pd:
> - case Intrinsic::x86_fma_vfmadd_ps_256:
> - case Intrinsic::x86_fma_vfmadd_pd_256:
> - case Intrinsic::x86_fma_vfmsub_ps_256:
> - case Intrinsic::x86_fma_vfmsub_pd_256:
> - case Intrinsic::x86_fma_vfnmadd_ps_256:
> - case Intrinsic::x86_fma_vfnmadd_pd_256:
> - case Intrinsic::x86_fma_vfnmsub_ps_256:
> - case Intrinsic::x86_fma_vfnmsub_pd_256:
> - case Intrinsic::x86_fma_vfmaddsub_ps_256:
> - case Intrinsic::x86_fma_vfmaddsub_pd_256:
> - case Intrinsic::x86_fma_vfmsubadd_ps_256:
> - case Intrinsic::x86_fma_vfmsubadd_pd_256:
> - return DAG.getNode(getOpcodeForFMAIntrinsic(IntNo), dl, Op.getValueType(),
> - Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
> }
> }
>
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=227303&r1=227302&r2=227303&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Wed Jan 28 04:21:27 2015
> @@ -378,6 +378,13 @@ namespace llvm {
> FNMSUB,
> FMADDSUB,
> FMSUBADD,
> + // FMA with rounding mode
> + FMADD_RND,
> + FNMADD_RND,
> + FMSUB_RND,
> + FNMSUB_RND,
> + FMADDSUB_RND,
> + FMSUBADD_RND,
>
> // Compress and expand
> COMPRESS,
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=227303&r1=227302&r2=227303&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Jan 28 04:21:27 2015
> @@ -3582,6 +3582,24 @@ multiclass avx512_fma3p_rm<bits<8> opc,
> }
> } // Constraints = "$src1 = $dst"
>
> +let Constraints = "$src1 = $dst" in {
> +// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching.
> +multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
> + SDPatternOperator OpNode> {
> + defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
> + (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
> + OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
> + (_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
> + AVX512FMA3Base, EVEX_B, EVEX_RC;
> + }
> +} // Constraints = "$src1 = $dst"
> +
> +multiclass avx512_fma3_round_forms<bits<8> opc213, string OpcodeStr,
> + X86VectorVTInfo VTI, SDPatternOperator OpNode> {
> + defm v213r : avx512_fma3_round_rrb<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
> + VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
> +}
> +
> multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
> string OpcodeStr, X86VectorVTInfo VTI,
> SDPatternOperator OpNode> {
> @@ -3594,10 +3612,13 @@ multiclass avx512_fma3p_forms<bits<8> op
>
> multiclass avx512_fma3p<bits<8> opc213, bits<8> opc231,
> string OpcodeStr,
> - SDPatternOperator OpNode> {
> + SDPatternOperator OpNode,
> + SDPatternOperator OpNodeRnd> {
> let ExeDomain = SSEPackedSingle in {
> defm NAME##PSZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
> - v16f32_info, OpNode>, EVEX_V512;
> + v16f32_info, OpNode>,
> + avx512_fma3_round_forms<opc213, OpcodeStr,
> + v16f32_info, OpNodeRnd>, EVEX_V512;
> defm NAME##PSZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
> v8f32x_info, OpNode>, EVEX_V256;
> defm NAME##PSZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
> @@ -3605,7 +3626,9 @@ let ExeDomain = SSEPackedSingle in {
> }
> let ExeDomain = SSEPackedDouble in {
> defm NAME##PDZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
> - v8f64_info, OpNode>, EVEX_V512, VEX_W;
> + v8f64_info, OpNode>,
> + avx512_fma3_round_forms<opc213, OpcodeStr,
> + v8f64_info, OpNodeRnd>, EVEX_V512, VEX_W;
> defm NAME##PDZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
> v4f64x_info, OpNode>, EVEX_V256, VEX_W;
> defm NAME##PDZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
> @@ -3613,12 +3636,12 @@ let ExeDomain = SSEPackedDouble in {
> }
> }
>
> -defm VFMADD : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd>;
> -defm VFMSUB : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub>;
> -defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub>;
> -defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd>;
> -defm VFNMADD : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd>;
> -defm VFNMSUB : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub>;
> +defm VFMADD : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd, X86FmaddRnd>;
> +defm VFMSUB : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub, X86FmsubRnd>;
> +defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub, X86FmaddsubRnd>;
> +defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd, X86FmsubaddRnd>;
> +defm VFNMADD : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
> +defm VFNMSUB : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
>
> let Constraints = "$src1 = $dst" in {
> multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=227303&r1=227302&r2=227303&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Wed Jan 28 04:21:27 2015
> @@ -203,6 +203,8 @@ def SDTBlend : SDTypeProfile<1, 3, [SDTC
>
> def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
> SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
> +def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
> + SDTCisSameAs<1,2>, SDTCisSameAs<1,3>, SDTCisInt<4>]>;
> def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>,
> SDTCisVec<0>, SDTCisInt<2>]>;
> def STDFp2SrcRm : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
> @@ -265,6 +267,13 @@ def X86Fnmsub : SDNode<"X86ISD::FNMSU
> def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFma>;
> def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFma>;
>
> +def X86FmaddRnd : SDNode<"X86ISD::FMADD_RND", SDTFmaRound>;
> +def X86FnmaddRnd : SDNode<"X86ISD::FNMADD_RND", SDTFmaRound>;
> +def X86FmsubRnd : SDNode<"X86ISD::FMSUB_RND", SDTFmaRound>;
> +def X86FnmsubRnd : SDNode<"X86ISD::FNMSUB_RND", SDTFmaRound>;
> +def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound>;
> +def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound>;
> +
> def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", STDFp1SrcRm>;
> def X86rcp28 : SDNode<"X86ISD::RCP28", STDFp1SrcRm>;
> def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>;
>
> Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=227303&r1=227302&r2=227303&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
> +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Wed Jan 28 04:21:27 2015
> @@ -398,30 +398,78 @@ static const IntrinsicData IntrinsicsWi
> X86_INTRINSIC_DATA(avx_vperm2f128_pd_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
> X86_INTRINSIC_DATA(avx_vperm2f128_ps_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
> X86_INTRINSIC_DATA(avx_vperm2f128_si_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
> - X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
> - X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
> - X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0),
> - X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,
> + X86ISD::FMADD_RND),
> + X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_512, FMA_OP_MASK, X86ISD::FMADD,
> + X86ISD::FMADD_RND),
> X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
> X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
> + X86ISD::FMADDSUB_RND),
> X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
> X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
> - X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
> - X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
> - X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
> - X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB,
> + X86ISD::FMADDSUB_RND),
> + X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_512, FMA_OP_MASK, X86ISD::FMSUB,
> + X86ISD::FMSUB_RND),
> + X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_512, FMA_OP_MASK, X86ISD::FMSUB,
> + X86ISD::FMSUB_RND),
> X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
> X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_512, FMA_OP_MASK, X86ISD::FMSUBADD,
> + X86ISD::FMSUBADD_RND),
> X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
> X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
> - X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
> - X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
> - X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
> - X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
> - X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
> - X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
> - X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB , 0),
> - X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_512, FMA_OP_MASK, X86ISD::FMSUBADD,
> + X86ISD::FMSUBADD_RND),
> + X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_512, FMA_OP_MASK, X86ISD::FNMADD,
> + X86ISD::FNMADD_RND),
> + X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_512, FMA_OP_MASK, X86ISD::FNMADD,
> + X86ISD::FNMADD_RND),
> + X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_512, FMA_OP_MASK, X86ISD::FNMSUB,
> + X86ISD::FNMSUB_RND),
> + X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
> + X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
> + X86ISD::FNMSUB_RND),
> + X86_INTRINSIC_DATA(fma_vfmadd_pd, INTR_TYPE_3OP, X86ISD::FMADD, 0),
> + X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, X86ISD::FMADD, 0),
> + X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, X86ISD::FMADD, 0),
> + X86_INTRINSIC_DATA(fma_vfmadd_ps_256, INTR_TYPE_3OP, X86ISD::FMADD, 0),
> + X86_INTRINSIC_DATA(fma_vfmaddsub_pd, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
> + X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
> + X86_INTRINSIC_DATA(fma_vfmaddsub_ps, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
> + X86_INTRINSIC_DATA(fma_vfmaddsub_ps_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
> + X86_INTRINSIC_DATA(fma_vfmsub_pd, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
> + X86_INTRINSIC_DATA(fma_vfmsub_pd_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
> + X86_INTRINSIC_DATA(fma_vfmsub_ps, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
> + X86_INTRINSIC_DATA(fma_vfmsub_ps_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
> + X86_INTRINSIC_DATA(fma_vfmsubadd_pd, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
> + X86_INTRINSIC_DATA(fma_vfmsubadd_pd_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
> + X86_INTRINSIC_DATA(fma_vfmsubadd_ps, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
> + X86_INTRINSIC_DATA(fma_vfmsubadd_ps_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
> + X86_INTRINSIC_DATA(fma_vfnmadd_pd, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
> + X86_INTRINSIC_DATA(fma_vfnmadd_pd_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
> + X86_INTRINSIC_DATA(fma_vfnmadd_ps, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
> + X86_INTRINSIC_DATA(fma_vfnmadd_ps_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
> + X86_INTRINSIC_DATA(fma_vfnmsub_pd, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
> + X86_INTRINSIC_DATA(fma_vfnmsub_pd_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
> + X86_INTRINSIC_DATA(fma_vfnmsub_ps, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
> + X86_INTRINSIC_DATA(fma_vfnmsub_ps_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
> X86_INTRINSIC_DATA(sse2_comieq_sd, COMI, X86ISD::COMI, ISD::SETEQ),
> X86_INTRINSIC_DATA(sse2_comige_sd, COMI, X86ISD::COMI, ISD::SETGE),
> X86_INTRINSIC_DATA(sse2_comigt_sd, COMI, X86ISD::COMI, ISD::SETGT),
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll?rev=227303&r1=227302&r2=227303&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll Wed Jan 28 04:21:27 2015
> @@ -182,3 +182,283 @@ define <8 x double> @test_mask_vfmsubadd
> ret <8 x double> %res
> }
>
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne
> + ; CHECK: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn
> + ; CHECK: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x39,0xa8,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp
> + ; CHECK: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xa8,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz
> + ; CHECK: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xa8,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current
> + ; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa8,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne
> + ; CHECK: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn
> + ; CHECK: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x38,0xa8,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp
> + ; CHECK: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x58,0xa8,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz
> + ; CHECK: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x78,0xa8,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current
> + ; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rne
> + ; CHECK: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xaa,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtn
> + ; CHECK: vfmsub213ps {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x39,0xaa,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtp
> + ; CHECK: vfmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xaa,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtz
> + ; CHECK: vfmsub213ps {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xaa,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_current
> + ; CHECK: vfmsub213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xaa,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rne
> + ; CHECK: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xaa,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtn
> + ; CHECK: vfmsub213ps {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x38,0xaa,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtp
> + ; CHECK: vfmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x58,0xaa,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtz
> + ; CHECK: vfmsub213ps {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x78,0xaa,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_current
> + ; CHECK: vfmsub213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xaa,0xc2]
> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
> + ret <16 x float> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne
> + ; CHECK: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xa8,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
> + ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn
> + ; CHECK: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xa8,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
> + ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp
> + ; CHECK: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xa8,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
> + ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz
> + ; CHECK: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xa8,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
> + ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current
> + ; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa8,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
> + ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne
> + ; CHECK: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xa8,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
> + ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn
> + ; CHECK: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xa8,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
> + ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp
> + ; CHECK: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xa8,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
> + ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz
> + ; CHECK: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xa8,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
> + ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current
> + ; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
> + ret <8 x double> %res
> +}
> +
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne
> + ; CHECK: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xae,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
> + ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn
> + ; CHECK: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xae,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
> + ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp
> + ; CHECK: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xae,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
> + ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz
> + ; CHECK: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xae,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
> + ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current
> + ; CHECK: vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xae,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
> + ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne
> + ; CHECK: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xae,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
> + ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn
> + ; CHECK: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xae,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
> + ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp
> + ; CHECK: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xae,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
> + ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz
> + ; CHECK: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xae,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
> + ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current
> + ; CHECK: vfnmsub213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2]
> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
> + ret <8 x double> %res
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list