[llvm] r227303 - AVX-512: Added FMA intrinsics with rounding mode

Demikhovsky, Elena elena.demikhovsky at intel.com
Wed Feb 18 00:09:12 PST 2015


> I think that in the enum where INTR_TYPE_2OP_MASK is defined it would be good to document the meaning of Opc1.  If I remember correctly that differs across the different cases.

Done in revision 229645.
Thanks.

-  Elena


-----Original Message-----
From: Adam Nemet [mailto:anemet at apple.com] 
Sent: Friday, January 30, 2015 08:46
To: Demikhovsky, Elena
Cc: llvm-commits at cs.uiuc.edu
Subject: Re: [llvm] r227303 - AVX-512: Added FMA intrinsics with rounding mode

Hi Elena,

Great, thanks!  I just have one follow-on request below.

> On Jan 28, 2015, at 2:21 AM, Elena Demikhovsky <elena.demikhovsky at intel.com> wrote:
> 
> Author: delena
> Date: Wed Jan 28 04:21:27 2015
> New Revision: 227303
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=227303&view=rev
> Log:
> AVX-512: Added FMA intrinsics with rounding mode By Asaf Badouh and 
> Elena Demikhovsky
> 
> Added special nodes for rounding: FMADD_RND, FMSUB_RND..
> It will prevent merge between nodes with rounding and other standard nodes.
> 
> 
> Modified:
>   llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>   llvm/trunk/lib/Target/X86/X86ISelLowering.h
>   llvm/trunk/lib/Target/X86/X86InstrAVX512.td
>   llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
>   llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
>   llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll
> 
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: 
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelL
> owering.cpp?rev=227303&r1=227302&r2=227303&view=diff
> ======================================================================
> ========
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jan 28 04:21:27 
> +++ 2015
> @@ -17039,54 +17039,6 @@ static SDValue getScalarMaskingNode(SDVa
>    return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, 
> PreservedSrc); }
> 
> -static unsigned getOpcodeForFMAIntrinsic(unsigned IntNo) {
> -    switch (IntNo) {
> -    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
> -    case Intrinsic::x86_fma_vfmadd_ps:
> -    case Intrinsic::x86_fma_vfmadd_pd:
> -    case Intrinsic::x86_fma_vfmadd_ps_256:
> -    case Intrinsic::x86_fma_vfmadd_pd_256:
> -    case Intrinsic::x86_fma_mask_vfmadd_ps_512:
> -    case Intrinsic::x86_fma_mask_vfmadd_pd_512:
> -      return X86ISD::FMADD;
> -    case Intrinsic::x86_fma_vfmsub_ps:
> -    case Intrinsic::x86_fma_vfmsub_pd:
> -    case Intrinsic::x86_fma_vfmsub_ps_256:
> -    case Intrinsic::x86_fma_vfmsub_pd_256:
> -    case Intrinsic::x86_fma_mask_vfmsub_ps_512:
> -    case Intrinsic::x86_fma_mask_vfmsub_pd_512:
> -      return X86ISD::FMSUB;
> -    case Intrinsic::x86_fma_vfnmadd_ps:
> -    case Intrinsic::x86_fma_vfnmadd_pd:
> -    case Intrinsic::x86_fma_vfnmadd_ps_256:
> -    case Intrinsic::x86_fma_vfnmadd_pd_256:
> -    case Intrinsic::x86_fma_mask_vfnmadd_ps_512:
> -    case Intrinsic::x86_fma_mask_vfnmadd_pd_512:
> -      return X86ISD::FNMADD;
> -    case Intrinsic::x86_fma_vfnmsub_ps:
> -    case Intrinsic::x86_fma_vfnmsub_pd:
> -    case Intrinsic::x86_fma_vfnmsub_ps_256:
> -    case Intrinsic::x86_fma_vfnmsub_pd_256:
> -    case Intrinsic::x86_fma_mask_vfnmsub_ps_512:
> -    case Intrinsic::x86_fma_mask_vfnmsub_pd_512:
> -      return X86ISD::FNMSUB;
> -    case Intrinsic::x86_fma_vfmaddsub_ps:
> -    case Intrinsic::x86_fma_vfmaddsub_pd:
> -    case Intrinsic::x86_fma_vfmaddsub_ps_256:
> -    case Intrinsic::x86_fma_vfmaddsub_pd_256:
> -    case Intrinsic::x86_fma_mask_vfmaddsub_ps_512:
> -    case Intrinsic::x86_fma_mask_vfmaddsub_pd_512:
> -      return X86ISD::FMADDSUB;
> -    case Intrinsic::x86_fma_vfmsubadd_ps:
> -    case Intrinsic::x86_fma_vfmsubadd_pd:
> -    case Intrinsic::x86_fma_vfmsubadd_ps_256:
> -    case Intrinsic::x86_fma_vfmsubadd_pd_256:
> -    case Intrinsic::x86_fma_mask_vfmsubadd_ps_512:
> -    case Intrinsic::x86_fma_mask_vfmsubadd_pd_512:
> -      return X86ISD::FMSUBADD;
> -    }
> -}
> -
> static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
>                                       SelectionDAG &DAG) {  SDLoc 
> dl(Op); @@ -17123,9 +17075,43 @@ static SDValue 
> LowerINTRINSIC_WO_CHAIN(S
>                                  Mask, Src0, Subtarget, DAG);
>    }
>    case INTR_TYPE_2OP_MASK: {
> -      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
> +      SDValue Mask = Op.getOperand(4);
> +      SDValue PassThru = Op.getOperand(3);
> +      unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;

I think that in the enum where INTR_TYPE_2OP_MASK is defined it would be good to document the meaning of Opc1.  If I remember correctly that differs across the different cases.

Thanks,
Adam

> +      if (IntrWithRoundingModeOpcode != 0) {
> +        unsigned Round = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
> +        if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) {
> +          return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
> +                                      dl, Op.getValueType(),
> +                                      Op.getOperand(1), Op.getOperand(2),
> +                                      Op.getOperand(3), Op.getOperand(5)),
> +                                      Mask, PassThru, Subtarget, DAG);
> +        }
> +      }
> +      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
> +                                              Op.getOperand(1),
>                                              Op.getOperand(2)),
> -                                  Op.getOperand(4), Op.getOperand(3), Subtarget, DAG);
> +                                  Mask, PassThru, Subtarget, DAG);
> +    }
> +    case FMA_OP_MASK: {
> +      SDValue Src1 = Op.getOperand(1);
> +      SDValue Src2 = Op.getOperand(2);
> +      SDValue Src3 = Op.getOperand(3);
> +      SDValue Mask = Op.getOperand(4);
> +      unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
> +      if (IntrWithRoundingModeOpcode != 0) {
> +        SDValue Rnd = Op.getOperand(5);
> +        if (cast<ConstantSDNode>(Rnd)->getZExtValue() !=
> +            X86::STATIC_ROUNDING::CUR_DIRECTION)
> +          return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
> +                                                  dl, Op.getValueType(),
> +                                                  Src1, Src2, Src3, Rnd),
> +                                      Mask, Src1, Subtarget, DAG);
> +      }
> +      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
> +                                              dl, Op.getValueType(),
> +                                              Src1, Src2, Src3),
> +                                  Mask, Src1, Subtarget, DAG);
>    }
>    case CMP_MASK:
>    case CMP_MASK_CC: {
> @@ -17215,16 +17201,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
>      return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1),
>                         Op.getOperand(2));
>    }
> -    case FMA_OP_MASK:
> -    {
> -        return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
> -            dl, Op.getValueType(),
> -            Op.getOperand(1),
> -            Op.getOperand(2),
> -            Op.getOperand(3)),
> -            Op.getOperand(4), Op.getOperand(1),
> -            Subtarget, DAG);
> -    }
>    default:
>      break;
>    }
> @@ -17395,58 +17371,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
>    SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
>    return DAG.getNode(Opcode, dl, VTs, NewOps);  }
> -
> -  case Intrinsic::x86_fma_mask_vfmadd_ps_512:
> -  case Intrinsic::x86_fma_mask_vfmadd_pd_512:
> -  case Intrinsic::x86_fma_mask_vfmsub_ps_512:
> -  case Intrinsic::x86_fma_mask_vfmsub_pd_512:
> -  case Intrinsic::x86_fma_mask_vfnmadd_ps_512:
> -  case Intrinsic::x86_fma_mask_vfnmadd_pd_512:
> -  case Intrinsic::x86_fma_mask_vfnmsub_ps_512:
> -  case Intrinsic::x86_fma_mask_vfnmsub_pd_512:
> -  case Intrinsic::x86_fma_mask_vfmaddsub_ps_512:
> -  case Intrinsic::x86_fma_mask_vfmaddsub_pd_512:
> -  case Intrinsic::x86_fma_mask_vfmsubadd_ps_512:
> -  case Intrinsic::x86_fma_mask_vfmsubadd_pd_512: {
> -    auto *SAE = cast<ConstantSDNode>(Op.getOperand(5));
> -    if (SAE->getZExtValue() == X86::STATIC_ROUNDING::CUR_DIRECTION)
> -      return getVectorMaskingNode(DAG.getNode(getOpcodeForFMAIntrinsic(IntNo),
> -                                              dl, Op.getValueType(),
> -                                              Op.getOperand(1),
> -                                              Op.getOperand(2),
> -                                              Op.getOperand(3)),
> -                                  Op.getOperand(4), Op.getOperand(1),
> -                                  Subtarget, DAG);
> -    else
> -      return SDValue();
> -  }
> -
> -  case Intrinsic::x86_fma_vfmadd_ps:
> -  case Intrinsic::x86_fma_vfmadd_pd:
> -  case Intrinsic::x86_fma_vfmsub_ps:
> -  case Intrinsic::x86_fma_vfmsub_pd:
> -  case Intrinsic::x86_fma_vfnmadd_ps:
> -  case Intrinsic::x86_fma_vfnmadd_pd:
> -  case Intrinsic::x86_fma_vfnmsub_ps:
> -  case Intrinsic::x86_fma_vfnmsub_pd:
> -  case Intrinsic::x86_fma_vfmaddsub_ps:
> -  case Intrinsic::x86_fma_vfmaddsub_pd:
> -  case Intrinsic::x86_fma_vfmsubadd_ps:
> -  case Intrinsic::x86_fma_vfmsubadd_pd:
> -  case Intrinsic::x86_fma_vfmadd_ps_256:
> -  case Intrinsic::x86_fma_vfmadd_pd_256:
> -  case Intrinsic::x86_fma_vfmsub_ps_256:
> -  case Intrinsic::x86_fma_vfmsub_pd_256:
> -  case Intrinsic::x86_fma_vfnmadd_ps_256:
> -  case Intrinsic::x86_fma_vfnmadd_pd_256:
> -  case Intrinsic::x86_fma_vfnmsub_ps_256:
> -  case Intrinsic::x86_fma_vfnmsub_pd_256:
> -  case Intrinsic::x86_fma_vfmaddsub_ps_256:
> -  case Intrinsic::x86_fma_vfmaddsub_pd_256:
> -  case Intrinsic::x86_fma_vfmsubadd_ps_256:
> -  case Intrinsic::x86_fma_vfmsubadd_pd_256:
> -    return DAG.getNode(getOpcodeForFMAIntrinsic(IntNo), dl, Op.getValueType(),
> -                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
>  }
> }
> 
> 
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
> URL: 
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelL
> owering.h?rev=227303&r1=227302&r2=227303&view=diff
> ======================================================================
> ========
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Wed Jan 28 04:21:27 
> +++ 2015
> @@ -378,6 +378,13 @@ namespace llvm {
>      FNMSUB,
>      FMADDSUB,
>      FMSUBADD,
> +      // FMA with rounding mode
> +      FMADD_RND,
> +      FNMADD_RND,
> +      FMSUB_RND,
> +      FNMSUB_RND,
> +      FMADDSUB_RND,
> +      FMSUBADD_RND,     
> 
>      // Compress and expand
>      COMPRESS,
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
> URL: 
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Instr
> AVX512.td?rev=227303&r1=227302&r2=227303&view=diff
> ======================================================================
> ========
> --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Jan 28 04:21:27 
> +++ 2015
> @@ -3582,6 +3582,24 @@ multiclass avx512_fma3p_rm<bits<8> opc, } } // 
> Constraints = "$src1 = $dst"
> 
> +let Constraints = "$src1 = $dst" in { // Omitting the parameter 
> +OpNode (= null_frag) disables ISel pattern matching.
> +multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
> +                           SDPatternOperator OpNode> {
> +   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
> +          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
> +          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
> +          (_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
> +          AVX512FMA3Base, EVEX_B, EVEX_RC;  } } // Constraints = 
> +"$src1 = $dst"
> +
> +multiclass avx512_fma3_round_forms<bits<8> opc213, string OpcodeStr,
> +                              X86VectorVTInfo VTI, SDPatternOperator 
> +OpNode> {
> +  defm v213r : avx512_fma3_round_rrb<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
> +                              VTI, OpNode>, EVEX_CD8<VTI.EltSize, 
> +CD8VF>; }
> +
> multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
>                              string OpcodeStr, X86VectorVTInfo VTI,
>                              SDPatternOperator OpNode> { @@ -3594,10 
> +3612,13 @@ multiclass avx512_fma3p_forms<bits<8> op
> 
> multiclass avx512_fma3p<bits<8> opc213, bits<8> opc231,
>                              string OpcodeStr,
> -                              SDPatternOperator OpNode> {
> +                              SDPatternOperator OpNode,
> +                              SDPatternOperator OpNodeRnd> {
> let ExeDomain = SSEPackedSingle in {
>    defm NAME##PSZ      : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
> -                                             v16f32_info, OpNode>, EVEX_V512;
> +                                             v16f32_info, OpNode>,
> +                          avx512_fma3_round_forms<opc213, OpcodeStr,
> +                                             v16f32_info, OpNodeRnd>, 
> + EVEX_V512;
>    defm NAME##PSZ256   : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
>                                             v8f32x_info, OpNode>, EVEX_V256;
>    defm NAME##PSZ128   : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
> @@ -3605,7 +3626,9 @@ let ExeDomain = SSEPackedSingle in {  } let 
> ExeDomain = SSEPackedDouble in {
>    defm  NAME##PDZ     : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
> -                                             v8f64_info, OpNode>, EVEX_V512, VEX_W;
> +                                             v8f64_info, OpNode>,
> +                          avx512_fma3_round_forms<opc213, OpcodeStr,
> +                                             v8f64_info, OpNodeRnd>, 
> + EVEX_V512, VEX_W;
>    defm  NAME##PDZ256  : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
>                                             v4f64x_info, OpNode>, EVEX_V256, VEX_W;
>    defm  NAME##PDZ128  : avx512_fma3p_forms<opc213, opc231, OpcodeStr, 
> @@ -3613,12 +3636,12 @@ let ExeDomain = SSEPackedDouble in {  } }
> 
> -defm VFMADD    : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd>;
> -defm VFMSUB    : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub>;
> -defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub>; 
> -defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd>;
> -defm VFNMADD   : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd>;
> -defm VFNMSUB   : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub>;
> +defm VFMADD    : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd, X86FmaddRnd>;
> +defm VFMSUB    : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub, X86FmsubRnd>;
> +defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub, 
> +X86FmaddsubRnd>; defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd, X86FmsubaddRnd>;
> +defm VFNMADD   : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
> +defm VFNMSUB   : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
> 
> let Constraints = "$src1 = $dst" in {
> multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode 
> OpNode,
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
> URL: 
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Instr
> FragmentsSIMD.td?rev=227303&r1=227302&r2=227303&view=diff
> ======================================================================
> ========
> --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Wed Jan 28 
> +++ 04:21:27 2015
> @@ -203,6 +203,8 @@ def SDTBlend : SDTypeProfile<1, 3, [SDTC
> 
> def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
>                           SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
> +def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
> +                           SDTCisSameAs<1,2>, SDTCisSameAs<1,3>, 
> +SDTCisInt<4>]>;
> def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>,
>                           SDTCisVec<0>, SDTCisInt<2>]>; def 
> STDFp2SrcRm : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
> @@ -265,6 +267,13 @@ def X86Fnmsub    : SDNode<"X86ISD::FNMSU
> def X86Fmaddsub  : SDNode<"X86ISD::FMADDSUB",  SDTFma>; def 
> X86Fmsubadd  : SDNode<"X86ISD::FMSUBADD",  SDTFma>;
> 
> +def X86FmaddRnd     : SDNode<"X86ISD::FMADD_RND",     SDTFmaRound>;
> +def X86FnmaddRnd    : SDNode<"X86ISD::FNMADD_RND",    SDTFmaRound>;
> +def X86FmsubRnd     : SDNode<"X86ISD::FMSUB_RND",     SDTFmaRound>;
> +def X86FnmsubRnd    : SDNode<"X86ISD::FNMSUB_RND",    SDTFmaRound>;
> +def X86FmaddsubRnd  : SDNode<"X86ISD::FMADDSUB_RND",  SDTFmaRound>; 
> +def X86FmsubaddRnd  : SDNode<"X86ISD::FMSUBADD_RND",  SDTFmaRound>;
> +
> def X86rsqrt28   : SDNode<"X86ISD::RSQRT28",  STDFp1SrcRm>;
> def X86rcp28     : SDNode<"X86ISD::RCP28",    STDFp1SrcRm>;
> def X86exp2      : SDNode<"X86ISD::EXP2",     STDFp1SrcRm>;
> 
> Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
> URL: 
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Intri
> nsicsInfo.h?rev=227303&r1=227302&r2=227303&view=diff
> ======================================================================
> ========
> --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
> +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Wed Jan 28 04:21:27 
> +++ 2015
> @@ -398,30 +398,78 @@ static const IntrinsicData  IntrinsicsWi  
> X86_INTRINSIC_DATA(avx_vperm2f128_pd_256, INTR_TYPE_3OP, 
> X86ISD::VPERM2X128, 0),  X86_INTRINSIC_DATA(avx_vperm2f128_ps_256, 
> INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),  
> X86_INTRINSIC_DATA(avx_vperm2f128_si_256, INTR_TYPE_3OP, 
> X86ISD::VPERM2X128, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128, FMA_OP_MASK, 
> X86ISD::FMADD, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256, FMA_OP_MASK, 
> X86ISD::FMADD, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128, FMA_OP_MASK, 
> X86ISD::FMADD, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256, FMA_OP_MASK, 
> X86ISD::FMADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128,    FMA_OP_MASK, X86ISD::FMADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256,    FMA_OP_MASK, X86ISD::FMADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_512,    FMA_OP_MASK, X86ISD::FMADD,
> +                     X86ISD::FMADD_RND),
> +  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128,    FMA_OP_MASK, X86ISD::FMADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256,    FMA_OP_MASK, X86ISD::FMADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_512,    FMA_OP_MASK, X86ISD::FMADD,
> +                     X86ISD::FMADD_RND),
>  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_128, FMA_OP_MASK, 
> X86ISD::FMADDSUB, 0),  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_256, 
> FMA_OP_MASK, X86ISD::FMADDSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
> +                     X86ISD::FMADDSUB_RND),
>  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_128, FMA_OP_MASK, 
> X86ISD::FMADDSUB, 0),  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_256, 
> FMA_OP_MASK, X86ISD::FMADDSUB, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128, FMA_OP_MASK, 
> X86ISD::FMSUB, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256, FMA_OP_MASK, 
> X86ISD::FMSUB, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128, FMA_OP_MASK, 
> X86ISD::FMSUB, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256, FMA_OP_MASK, 
> X86ISD::FMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB,
> +                     X86ISD::FMADDSUB_RND),
> +  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128,    FMA_OP_MASK, X86ISD::FMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256,    FMA_OP_MASK, X86ISD::FMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_512,    FMA_OP_MASK, X86ISD::FMSUB,
> +                     X86ISD::FMSUB_RND),
> +  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128,    FMA_OP_MASK, X86ISD::FMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256,    FMA_OP_MASK, X86ISD::FMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_512,    FMA_OP_MASK, X86ISD::FMSUB,
> +                     X86ISD::FMSUB_RND),
>  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_128, FMA_OP_MASK, 
> X86ISD::FMSUBADD, 0),  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_256, 
> FMA_OP_MASK, X86ISD::FMSUBADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_512, FMA_OP_MASK, X86ISD::FMSUBADD,
> +                     X86ISD::FMSUBADD_RND),
>  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_128, FMA_OP_MASK, 
> X86ISD::FMSUBADD, 0),  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_256, 
> FMA_OP_MASK, X86ISD::FMSUBADD, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128, FMA_OP_MASK, 
> X86ISD::FNMADD, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256, FMA_OP_MASK, 
> X86ISD::FNMADD, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128, FMA_OP_MASK, 
> X86ISD::FNMADD, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256, FMA_OP_MASK, 
> X86ISD::FNMADD, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128, FMA_OP_MASK, 
> X86ISD::FNMSUB, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256, FMA_OP_MASK, 
> X86ISD::FNMSUB, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128, FMA_OP_MASK, 
> X86ISD::FNMSUB , 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256, FMA_OP_MASK, 
> X86ISD::FNMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_512, FMA_OP_MASK, X86ISD::FMSUBADD,
> +                     X86ISD::FMSUBADD_RND),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128,   FMA_OP_MASK, X86ISD::FNMADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256,   FMA_OP_MASK, X86ISD::FNMADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_512,   FMA_OP_MASK, X86ISD::FNMADD,
> +                     X86ISD::FNMADD_RND),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128,   FMA_OP_MASK, X86ISD::FNMADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256,   FMA_OP_MASK, X86ISD::FNMADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_512,   FMA_OP_MASK, X86ISD::FNMADD,
> +                     X86ISD::FNMADD_RND),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_512,   FMA_OP_MASK, X86ISD::FNMSUB,
> +                     X86ISD::FNMSUB_RND),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_512,   FMA_OP_MASK, X86ISD::FNMSUB,
> +                     X86ISD::FNMSUB_RND),
> +  X86_INTRINSIC_DATA(fma_vfmadd_pd,        INTR_TYPE_3OP, X86ISD::FMADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfmadd_pd_256,    INTR_TYPE_3OP, X86ISD::FMADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfmadd_ps,        INTR_TYPE_3OP, X86ISD::FMADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfmadd_ps_256,    INTR_TYPE_3OP, X86ISD::FMADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfmaddsub_pd,     INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfmaddsub_ps,     INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfmaddsub_ps_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfmsub_pd,        INTR_TYPE_3OP, X86ISD::FMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfmsub_pd_256,    INTR_TYPE_3OP, X86ISD::FMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfmsub_ps,        INTR_TYPE_3OP, X86ISD::FMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfmsub_ps_256,    INTR_TYPE_3OP, X86ISD::FMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfmsubadd_pd,     INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfmsubadd_pd_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfmsubadd_ps,     INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfmsubadd_ps_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfnmadd_pd,       INTR_TYPE_3OP, X86ISD::FNMADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfnmadd_pd_256,   INTR_TYPE_3OP, X86ISD::FNMADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfnmadd_ps,       INTR_TYPE_3OP, X86ISD::FNMADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfnmadd_ps_256,   INTR_TYPE_3OP, X86ISD::FNMADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfnmsub_pd,       INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfnmsub_pd_256,   INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfnmsub_ps,       INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfnmsub_ps_256,   INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
>  X86_INTRINSIC_DATA(sse2_comieq_sd,    COMI, X86ISD::COMI, ISD::SETEQ),
>  X86_INTRINSIC_DATA(sse2_comige_sd,    COMI, X86ISD::COMI, ISD::SETGE),
>  X86_INTRINSIC_DATA(sse2_comigt_sd,    COMI, X86ISD::COMI, ISD::SETGT),
> 
> Modified: llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll
> URL: 
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512
> -fma-intrinsics.ll?rev=227303&r1=227302&r2=227303&view=diff
> ======================================================================
> ========
> --- llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll Wed Jan 28 
> +++ 04:21:27 2015
> @@ -182,3 +182,283 @@ define <8 x double> @test_mask_vfmsubadd  ret <8 
> x double> %res }
> 
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne
> +  ; CHECK: vfmadd213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
> +encoding: [0x62,0xf2,0x75,0x19,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn
> +  ; CHECK: vfmadd213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
> +encoding: [0x62,0xf2,0x75,0x39,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp
> +  ; CHECK: vfmadd213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
> +encoding: [0x62,0xf2,0x75,0x59,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz
> +  ; CHECK: vfmadd213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
> +encoding: [0x62,0xf2,0x75,0x79,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current
> +  ; CHECK: vfmadd213ps  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: 
> +[0x62,0xf2,0x75,0x49,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne
> +  ; CHECK: vfmadd213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0x75,0x18,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn
> +  ; CHECK: vfmadd213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0x75,0x38,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp
> +  ; CHECK: vfmadd213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0x75,0x58,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz
> +  ; CHECK: vfmadd213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0x75,0x78,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current
> +  ; CHECK: vfmadd213ps  %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0x75,0x48,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rne(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rne
> +  ; CHECK: vfmsub213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
> +encoding: [0x62,0xf2,0x75,0x19,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtn(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtn
> +  ; CHECK: vfmsub213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
> +encoding: [0x62,0xf2,0x75,0x39,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtp(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtp
> +  ; CHECK: vfmsub213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
> +encoding: [0x62,0xf2,0x75,0x59,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtz(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtz
> +  ; CHECK: vfmsub213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
> +encoding: [0x62,0xf2,0x75,0x79,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_current(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_current
> +  ; CHECK: vfmsub213ps  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: 
> +[0x62,0xf2,0x75,0x49,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rne(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rne
> +  ; CHECK: vfmsub213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0x75,0x18,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtn(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtn
> +  ; CHECK: vfmsub213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0x75,0x38,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtp(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtp
> +  ; CHECK: vfmsub213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0x75,0x58,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtz(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtz
> +  ; CHECK: vfmsub213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0x75,0x78,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_current(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_current
> +  ; CHECK: vfmsub213ps  %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0x75,0x48,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) 
> +nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne
> +  ; CHECK: vfmadd213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
> +encoding: [0x62,0xf2,0xf5,0x19,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) 
> +nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn
> +  ; CHECK: vfmadd213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
> +encoding: [0x62,0xf2,0xf5,0x39,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) 
> +nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp
> +  ; CHECK: vfmadd213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
> +encoding: [0x62,0xf2,0xf5,0x59,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) 
> +nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz
> +  ; CHECK: vfmadd213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
> +encoding: [0x62,0xf2,0xf5,0x79,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) 
> +nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current
> +  ; CHECK: vfmadd213pd  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: 
> +[0x62,0xf2,0xf5,0x49,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) 
> +nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne
> +  ; CHECK: vfmadd213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0xf5,0x18,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) 
> +nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn
> +  ; CHECK: vfmadd213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0xf5,0x38,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) 
> +nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp
> +  ; CHECK: vfmadd213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0xf5,0x58,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) 
> +nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz
> +  ; CHECK: vfmadd213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0xf5,0x78,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) 
> +nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current
> +  ; CHECK: vfmadd213pd  %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0xf5,0x48,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) 
> +nounwind
> +  ret <8 x double> %res
> +}
> +
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne
> +  ; CHECK: vfnmsub213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
> +encoding: [0x62,0xf2,0xf5,0x19,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) 
> +nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn
> +  ; CHECK: vfnmsub213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
> +encoding: [0x62,0xf2,0xf5,0x39,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) 
> +nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp
> +  ; CHECK: vfnmsub213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
> +encoding: [0x62,0xf2,0xf5,0x59,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) 
> +nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz
> +  ; CHECK: vfnmsub213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
> +encoding: [0x62,0xf2,0xf5,0x79,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) 
> +nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current
> +  ; CHECK: vfnmsub213pd  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: 
> +[0x62,0xf2,0xf5,0x49,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) 
> +nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne
> +  ; CHECK: vfnmsub213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0xf5,0x18,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) 
> +nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn
> +  ; CHECK: vfnmsub213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0xf5,0x38,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) 
> +nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp
> +  ; CHECK: vfnmsub213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0xf5,0x58,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) 
> +nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz
> +  ; CHECK: vfnmsub213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0xf5,0x78,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) 
> +nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current
> +  ; CHECK: vfnmsub213pd  %zmm2, %zmm1, %zmm0 ## encoding: 
> +[0x62,0xf2,0xf5,0x48,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) 
> +nounwind
> +  ret <8 x double> %res
> +}
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits

---------------------------------------------------------------------
Intel Israel (74) Limited

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.





More information about the llvm-commits mailing list