[llvm] r227303 - AVX-512: Added FMA intrinsics with rounding mode

Adam Nemet anemet at apple.com
Thu Jan 29 22:45:42 PST 2015


Hi Elena,

Great, thanks!  I just have one follow-on request below.

> On Jan 28, 2015, at 2:21 AM, Elena Demikhovsky <elena.demikhovsky at intel.com> wrote:
> 
> Author: delena
> Date: Wed Jan 28 04:21:27 2015
> New Revision: 227303
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=227303&view=rev
> Log:
> AVX-512: Added FMA intrinsics with rounding mode
> By Asaf Badouh and Elena Demikhovsky
> 
> Added special nodes for rounding: FMADD_RND, FMSUB_RND..
> It will prevent merge between nodes with rounding and other standard nodes.
> 
> 
> Modified:
>   llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>   llvm/trunk/lib/Target/X86/X86ISelLowering.h
>   llvm/trunk/lib/Target/X86/X86InstrAVX512.td
>   llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
>   llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
>   llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll
> 
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=227303&r1=227302&r2=227303&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jan 28 04:21:27 2015
> @@ -17039,54 +17039,6 @@ static SDValue getScalarMaskingNode(SDVa
>    return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
> }
> 
> -static unsigned getOpcodeForFMAIntrinsic(unsigned IntNo) {
> -    switch (IntNo) {
> -    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
> -    case Intrinsic::x86_fma_vfmadd_ps:
> -    case Intrinsic::x86_fma_vfmadd_pd:
> -    case Intrinsic::x86_fma_vfmadd_ps_256:
> -    case Intrinsic::x86_fma_vfmadd_pd_256:
> -    case Intrinsic::x86_fma_mask_vfmadd_ps_512:
> -    case Intrinsic::x86_fma_mask_vfmadd_pd_512:
> -      return X86ISD::FMADD;
> -    case Intrinsic::x86_fma_vfmsub_ps:
> -    case Intrinsic::x86_fma_vfmsub_pd:
> -    case Intrinsic::x86_fma_vfmsub_ps_256:
> -    case Intrinsic::x86_fma_vfmsub_pd_256:
> -    case Intrinsic::x86_fma_mask_vfmsub_ps_512:
> -    case Intrinsic::x86_fma_mask_vfmsub_pd_512:
> -      return X86ISD::FMSUB;
> -    case Intrinsic::x86_fma_vfnmadd_ps:
> -    case Intrinsic::x86_fma_vfnmadd_pd:
> -    case Intrinsic::x86_fma_vfnmadd_ps_256:
> -    case Intrinsic::x86_fma_vfnmadd_pd_256:
> -    case Intrinsic::x86_fma_mask_vfnmadd_ps_512:
> -    case Intrinsic::x86_fma_mask_vfnmadd_pd_512:
> -      return X86ISD::FNMADD;
> -    case Intrinsic::x86_fma_vfnmsub_ps:
> -    case Intrinsic::x86_fma_vfnmsub_pd:
> -    case Intrinsic::x86_fma_vfnmsub_ps_256:
> -    case Intrinsic::x86_fma_vfnmsub_pd_256:
> -    case Intrinsic::x86_fma_mask_vfnmsub_ps_512:
> -    case Intrinsic::x86_fma_mask_vfnmsub_pd_512:
> -      return X86ISD::FNMSUB;
> -    case Intrinsic::x86_fma_vfmaddsub_ps:
> -    case Intrinsic::x86_fma_vfmaddsub_pd:
> -    case Intrinsic::x86_fma_vfmaddsub_ps_256:
> -    case Intrinsic::x86_fma_vfmaddsub_pd_256:
> -    case Intrinsic::x86_fma_mask_vfmaddsub_ps_512:
> -    case Intrinsic::x86_fma_mask_vfmaddsub_pd_512:
> -      return X86ISD::FMADDSUB;
> -    case Intrinsic::x86_fma_vfmsubadd_ps:
> -    case Intrinsic::x86_fma_vfmsubadd_pd:
> -    case Intrinsic::x86_fma_vfmsubadd_ps_256:
> -    case Intrinsic::x86_fma_vfmsubadd_pd_256:
> -    case Intrinsic::x86_fma_mask_vfmsubadd_ps_512:
> -    case Intrinsic::x86_fma_mask_vfmsubadd_pd_512:
> -      return X86ISD::FMSUBADD;
> -    }
> -}
> -
> static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
>                                       SelectionDAG &DAG) {
>  SDLoc dl(Op);
> @@ -17123,9 +17075,43 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
>                                  Mask, Src0, Subtarget, DAG);
>    }
>    case INTR_TYPE_2OP_MASK: {
> -      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
> +      SDValue Mask = Op.getOperand(4);
> +      SDValue PassThru = Op.getOperand(3);
> +      unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;

I think that in the enum where INTR_TYPE_2OP_MASK is defined it would be good to document the meaning of Opc1.  If I remember correctly that differs across the different cases.

Thanks,
Adam

> +      if (IntrWithRoundingModeOpcode != 0) {
> +        unsigned Round = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
> +        if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) {
> +          return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
> +                                      dl, Op.getValueType(),
> +                                      Op.getOperand(1), Op.getOperand(2),
> +                                      Op.getOperand(3), Op.getOperand(5)),
> +                                      Mask, PassThru, Subtarget, DAG);
> +        }
> +      }
> +      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
> +                                              Op.getOperand(1),
>                                              Op.getOperand(2)),
> -                                  Op.getOperand(4), Op.getOperand(3), Subtarget, DAG);
> +                                  Mask, PassThru, Subtarget, DAG);
> +    }
> +    case FMA_OP_MASK: {
> +      SDValue Src1 = Op.getOperand(1);
> +      SDValue Src2 = Op.getOperand(2);
> +      SDValue Src3 = Op.getOperand(3);
> +      SDValue Mask = Op.getOperand(4);
> +      unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
> +      if (IntrWithRoundingModeOpcode != 0) {
> +        SDValue Rnd = Op.getOperand(5);
> +        if (cast<ConstantSDNode>(Rnd)->getZExtValue() !=
> +            X86::STATIC_ROUNDING::CUR_DIRECTION)
> +          return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
> +                                                  dl, Op.getValueType(),
> +                                                  Src1, Src2, Src3, Rnd),
> +                                      Mask, Src1, Subtarget, DAG);
> +      }
> +      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
> +                                              dl, Op.getValueType(),
> +                                              Src1, Src2, Src3),
> +                                  Mask, Src1, Subtarget, DAG);
>    }
>    case CMP_MASK:
>    case CMP_MASK_CC: {
> @@ -17215,16 +17201,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
>      return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1),
>                         Op.getOperand(2));
>    }
> -    case FMA_OP_MASK:
> -    {
> -        return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
> -            dl, Op.getValueType(),
> -            Op.getOperand(1),
> -            Op.getOperand(2),
> -            Op.getOperand(3)),
> -            Op.getOperand(4), Op.getOperand(1),
> -            Subtarget, DAG);
> -    }
>    default:
>      break;
>    }
> @@ -17395,58 +17371,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
>    SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
>    return DAG.getNode(Opcode, dl, VTs, NewOps);
>  }
> -
> -  case Intrinsic::x86_fma_mask_vfmadd_ps_512:
> -  case Intrinsic::x86_fma_mask_vfmadd_pd_512:
> -  case Intrinsic::x86_fma_mask_vfmsub_ps_512:
> -  case Intrinsic::x86_fma_mask_vfmsub_pd_512:
> -  case Intrinsic::x86_fma_mask_vfnmadd_ps_512:
> -  case Intrinsic::x86_fma_mask_vfnmadd_pd_512:
> -  case Intrinsic::x86_fma_mask_vfnmsub_ps_512:
> -  case Intrinsic::x86_fma_mask_vfnmsub_pd_512:
> -  case Intrinsic::x86_fma_mask_vfmaddsub_ps_512:
> -  case Intrinsic::x86_fma_mask_vfmaddsub_pd_512:
> -  case Intrinsic::x86_fma_mask_vfmsubadd_ps_512:
> -  case Intrinsic::x86_fma_mask_vfmsubadd_pd_512: {
> -    auto *SAE = cast<ConstantSDNode>(Op.getOperand(5));
> -    if (SAE->getZExtValue() == X86::STATIC_ROUNDING::CUR_DIRECTION)
> -      return getVectorMaskingNode(DAG.getNode(getOpcodeForFMAIntrinsic(IntNo),
> -                                              dl, Op.getValueType(),
> -                                              Op.getOperand(1),
> -                                              Op.getOperand(2),
> -                                              Op.getOperand(3)),
> -                                  Op.getOperand(4), Op.getOperand(1),
> -                                  Subtarget, DAG);
> -    else
> -      return SDValue();
> -  }
> -
> -  case Intrinsic::x86_fma_vfmadd_ps:
> -  case Intrinsic::x86_fma_vfmadd_pd:
> -  case Intrinsic::x86_fma_vfmsub_ps:
> -  case Intrinsic::x86_fma_vfmsub_pd:
> -  case Intrinsic::x86_fma_vfnmadd_ps:
> -  case Intrinsic::x86_fma_vfnmadd_pd:
> -  case Intrinsic::x86_fma_vfnmsub_ps:
> -  case Intrinsic::x86_fma_vfnmsub_pd:
> -  case Intrinsic::x86_fma_vfmaddsub_ps:
> -  case Intrinsic::x86_fma_vfmaddsub_pd:
> -  case Intrinsic::x86_fma_vfmsubadd_ps:
> -  case Intrinsic::x86_fma_vfmsubadd_pd:
> -  case Intrinsic::x86_fma_vfmadd_ps_256:
> -  case Intrinsic::x86_fma_vfmadd_pd_256:
> -  case Intrinsic::x86_fma_vfmsub_ps_256:
> -  case Intrinsic::x86_fma_vfmsub_pd_256:
> -  case Intrinsic::x86_fma_vfnmadd_ps_256:
> -  case Intrinsic::x86_fma_vfnmadd_pd_256:
> -  case Intrinsic::x86_fma_vfnmsub_ps_256:
> -  case Intrinsic::x86_fma_vfnmsub_pd_256:
> -  case Intrinsic::x86_fma_vfmaddsub_ps_256:
> -  case Intrinsic::x86_fma_vfmaddsub_pd_256:
> -  case Intrinsic::x86_fma_vfmsubadd_ps_256:
> -  case Intrinsic::x86_fma_vfmsubadd_pd_256:
> -    return DAG.getNode(getOpcodeForFMAIntrinsic(IntNo), dl, Op.getValueType(),
> -                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
>  }
> }
> 
> 
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=227303&r1=227302&r2=227303&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Wed Jan 28 04:21:27 2015
> @@ -378,6 +378,13 @@ namespace llvm {
>      FNMSUB,
>      FMADDSUB,
>      FMSUBADD,
> +      // FMA with rounding mode
> +      FMADD_RND,
> +      FNMADD_RND,
> +      FMSUB_RND,
> +      FNMSUB_RND,
> +      FMADDSUB_RND,
> +      FMSUBADD_RND,     
> 
>      // Compress and expand
>      COMPRESS,
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=227303&r1=227302&r2=227303&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Jan 28 04:21:27 2015
> @@ -3582,6 +3582,24 @@ multiclass avx512_fma3p_rm<bits<8> opc,
> }
> } // Constraints = "$src1 = $dst"
> 
> +let Constraints = "$src1 = $dst" in {
> +// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching.
> +multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
> +                           SDPatternOperator OpNode> {
> +   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
> +          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
> +          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
> +          (_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
> +          AVX512FMA3Base, EVEX_B, EVEX_RC;
> + }
> +} // Constraints = "$src1 = $dst"
> +
> +multiclass avx512_fma3_round_forms<bits<8> opc213, string OpcodeStr,
> +                              X86VectorVTInfo VTI, SDPatternOperator OpNode> {
> +  defm v213r : avx512_fma3_round_rrb<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
> +                              VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
> +}
> +
> multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
>                              string OpcodeStr, X86VectorVTInfo VTI,
>                              SDPatternOperator OpNode> {
> @@ -3594,10 +3612,13 @@ multiclass avx512_fma3p_forms<bits<8> op
> 
> multiclass avx512_fma3p<bits<8> opc213, bits<8> opc231,
>                              string OpcodeStr,
> -                              SDPatternOperator OpNode> {
> +                              SDPatternOperator OpNode,
> +                              SDPatternOperator OpNodeRnd> {
> let ExeDomain = SSEPackedSingle in {
>    defm NAME##PSZ      : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
> -                                             v16f32_info, OpNode>, EVEX_V512;
> +                                             v16f32_info, OpNode>,
> +                          avx512_fma3_round_forms<opc213, OpcodeStr,
> +                                             v16f32_info, OpNodeRnd>, EVEX_V512;
>    defm NAME##PSZ256   : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
>                                             v8f32x_info, OpNode>, EVEX_V256;
>    defm NAME##PSZ128   : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
> @@ -3605,7 +3626,9 @@ let ExeDomain = SSEPackedSingle in {
>  }
> let ExeDomain = SSEPackedDouble in {
>    defm  NAME##PDZ     : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
> -                                             v8f64_info, OpNode>, EVEX_V512, VEX_W;
> +                                             v8f64_info, OpNode>,
> +                          avx512_fma3_round_forms<opc213, OpcodeStr,
> +                                             v8f64_info, OpNodeRnd>, EVEX_V512, VEX_W;
>    defm  NAME##PDZ256  : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
>                                             v4f64x_info, OpNode>, EVEX_V256, VEX_W;
>    defm  NAME##PDZ128  : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
> @@ -3613,12 +3636,12 @@ let ExeDomain = SSEPackedDouble in {
>  }
> }
> 
> -defm VFMADD    : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd>;
> -defm VFMSUB    : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub>;
> -defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub>;
> -defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd>;
> -defm VFNMADD   : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd>;
> -defm VFNMSUB   : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub>;
> +defm VFMADD    : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd, X86FmaddRnd>;
> +defm VFMSUB    : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub, X86FmsubRnd>;
> +defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub, X86FmaddsubRnd>;
> +defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd, X86FmsubaddRnd>;
> +defm VFNMADD   : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
> +defm VFNMSUB   : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
> 
> let Constraints = "$src1 = $dst" in {
> multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=227303&r1=227302&r2=227303&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Wed Jan 28 04:21:27 2015
> @@ -203,6 +203,8 @@ def SDTBlend : SDTypeProfile<1, 3, [SDTC
> 
> def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
>                           SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
> +def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
> +                           SDTCisSameAs<1,2>, SDTCisSameAs<1,3>, SDTCisInt<4>]>;
> def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>,
>                           SDTCisVec<0>, SDTCisInt<2>]>;
> def STDFp2SrcRm : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
> @@ -265,6 +267,13 @@ def X86Fnmsub    : SDNode<"X86ISD::FNMSU
> def X86Fmaddsub  : SDNode<"X86ISD::FMADDSUB",  SDTFma>;
> def X86Fmsubadd  : SDNode<"X86ISD::FMSUBADD",  SDTFma>;
> 
> +def X86FmaddRnd     : SDNode<"X86ISD::FMADD_RND",     SDTFmaRound>;
> +def X86FnmaddRnd    : SDNode<"X86ISD::FNMADD_RND",    SDTFmaRound>;
> +def X86FmsubRnd     : SDNode<"X86ISD::FMSUB_RND",     SDTFmaRound>;
> +def X86FnmsubRnd    : SDNode<"X86ISD::FNMSUB_RND",    SDTFmaRound>;
> +def X86FmaddsubRnd  : SDNode<"X86ISD::FMADDSUB_RND",  SDTFmaRound>;
> +def X86FmsubaddRnd  : SDNode<"X86ISD::FMSUBADD_RND",  SDTFmaRound>;
> +
> def X86rsqrt28   : SDNode<"X86ISD::RSQRT28",  STDFp1SrcRm>;
> def X86rcp28     : SDNode<"X86ISD::RCP28",    STDFp1SrcRm>;
> def X86exp2      : SDNode<"X86ISD::EXP2",     STDFp1SrcRm>;
> 
> Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=227303&r1=227302&r2=227303&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
> +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Wed Jan 28 04:21:27 2015
> @@ -398,30 +398,78 @@ static const IntrinsicData  IntrinsicsWi
>  X86_INTRINSIC_DATA(avx_vperm2f128_pd_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
>  X86_INTRINSIC_DATA(avx_vperm2f128_ps_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
>  X86_INTRINSIC_DATA(avx_vperm2f128_si_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128,    FMA_OP_MASK, X86ISD::FMADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256,    FMA_OP_MASK, X86ISD::FMADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_512,    FMA_OP_MASK, X86ISD::FMADD,
> +                     X86ISD::FMADD_RND),
> +  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128,    FMA_OP_MASK, X86ISD::FMADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256,    FMA_OP_MASK, X86ISD::FMADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_512,    FMA_OP_MASK, X86ISD::FMADD,
> +                     X86ISD::FMADD_RND),
>  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
>  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
> +                     X86ISD::FMADDSUB_RND),
>  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
>  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB,
> +                     X86ISD::FMADDSUB_RND),
> +  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128,    FMA_OP_MASK, X86ISD::FMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256,    FMA_OP_MASK, X86ISD::FMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_512,    FMA_OP_MASK, X86ISD::FMSUB,
> +                     X86ISD::FMSUB_RND),
> +  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128,    FMA_OP_MASK, X86ISD::FMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256,    FMA_OP_MASK, X86ISD::FMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_512,    FMA_OP_MASK, X86ISD::FMSUB,
> +                     X86ISD::FMSUB_RND),
>  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
>  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_512, FMA_OP_MASK, X86ISD::FMSUBADD,
> +                     X86ISD::FMSUBADD_RND),
>  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
>  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB , 0),
> -  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_512, FMA_OP_MASK, X86ISD::FMSUBADD,
> +                     X86ISD::FMSUBADD_RND),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128,   FMA_OP_MASK, X86ISD::FNMADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256,   FMA_OP_MASK, X86ISD::FNMADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_512,   FMA_OP_MASK, X86ISD::FNMADD,
> +                     X86ISD::FNMADD_RND),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128,   FMA_OP_MASK, X86ISD::FNMADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256,   FMA_OP_MASK, X86ISD::FNMADD, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_512,   FMA_OP_MASK, X86ISD::FNMADD,
> +                     X86ISD::FNMADD_RND),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_512,   FMA_OP_MASK, X86ISD::FNMSUB,
> +                     X86ISD::FNMSUB_RND),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_512,   FMA_OP_MASK, X86ISD::FNMSUB,
> +                     X86ISD::FNMSUB_RND),
> +  X86_INTRINSIC_DATA(fma_vfmadd_pd,        INTR_TYPE_3OP, X86ISD::FMADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfmadd_pd_256,    INTR_TYPE_3OP, X86ISD::FMADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfmadd_ps,        INTR_TYPE_3OP, X86ISD::FMADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfmadd_ps_256,    INTR_TYPE_3OP, X86ISD::FMADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfmaddsub_pd,     INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfmaddsub_ps,     INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfmaddsub_ps_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfmsub_pd,        INTR_TYPE_3OP, X86ISD::FMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfmsub_pd_256,    INTR_TYPE_3OP, X86ISD::FMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfmsub_ps,        INTR_TYPE_3OP, X86ISD::FMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfmsub_ps_256,    INTR_TYPE_3OP, X86ISD::FMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfmsubadd_pd,     INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfmsubadd_pd_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfmsubadd_ps,     INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfmsubadd_ps_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfnmadd_pd,       INTR_TYPE_3OP, X86ISD::FNMADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfnmadd_pd_256,   INTR_TYPE_3OP, X86ISD::FNMADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfnmadd_ps,       INTR_TYPE_3OP, X86ISD::FNMADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfnmadd_ps_256,   INTR_TYPE_3OP, X86ISD::FNMADD, 0),
> +  X86_INTRINSIC_DATA(fma_vfnmsub_pd,       INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfnmsub_pd_256,   INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfnmsub_ps,       INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
> +  X86_INTRINSIC_DATA(fma_vfnmsub_ps_256,   INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
>  X86_INTRINSIC_DATA(sse2_comieq_sd,    COMI, X86ISD::COMI, ISD::SETEQ),
>  X86_INTRINSIC_DATA(sse2_comige_sd,    COMI, X86ISD::COMI, ISD::SETGE),
>  X86_INTRINSIC_DATA(sse2_comigt_sd,    COMI, X86ISD::COMI, ISD::SETGT),
> 
> Modified: llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll?rev=227303&r1=227302&r2=227303&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll Wed Jan 28 04:21:27 2015
> @@ -182,3 +182,283 @@ define <8 x double> @test_mask_vfmsubadd
>  ret <8 x double> %res
> }
> 
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne
> +  ; CHECK: vfmadd213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn
> +  ; CHECK: vfmadd213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x39,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp
> +  ; CHECK: vfmadd213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz
> +  ; CHECK: vfmadd213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current
> +  ; CHECK: vfmadd213ps  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne
> +  ; CHECK: vfmadd213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn
> +  ; CHECK: vfmadd213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x38,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp
> +  ; CHECK: vfmadd213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x58,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz
> +  ; CHECK: vfmadd213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x78,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current
> +  ; CHECK: vfmadd213ps  %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rne
> +  ; CHECK: vfmsub213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtn
> +  ; CHECK: vfmsub213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x39,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtp
> +  ; CHECK: vfmsub213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtz
> +  ; CHECK: vfmsub213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_current
> +  ; CHECK: vfmsub213ps  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rne
> +  ; CHECK: vfmsub213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtn
> +  ; CHECK: vfmsub213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x38,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtp
> +  ; CHECK: vfmsub213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x58,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtz
> +  ; CHECK: vfmsub213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x78,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_current
> +  ; CHECK: vfmsub213ps  %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xaa,0xc2]
> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
> +  ret <16 x float> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne
> +  ; CHECK: vfmadd213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn
> +  ; CHECK: vfmadd213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp
> +  ; CHECK: vfmadd213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz
> +  ; CHECK: vfmadd213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current
> +  ; CHECK: vfmadd213pd  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne
> +  ; CHECK: vfmadd213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn
> +  ; CHECK: vfmadd213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp
> +  ; CHECK: vfmadd213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz
> +  ; CHECK: vfmadd213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current
> +  ; CHECK: vfmadd213pd  %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
> +  ret <8 x double> %res
> +}
> +
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne
> +  ; CHECK: vfnmsub213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn
> +  ; CHECK: vfnmsub213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp
> +  ; CHECK: vfnmsub213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz
> +  ; CHECK: vfnmsub213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current
> +  ; CHECK: vfnmsub213pd  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne
> +  ; CHECK: vfnmsub213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn
> +  ; CHECK: vfnmsub213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp
> +  ; CHECK: vfnmsub213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz
> +  ; CHECK: vfnmsub213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
> +  ret <8 x double> %res
> +}
> +
> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current
> +  ; CHECK: vfnmsub213pd  %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2]
> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
> +  ret <8 x double> %res
> +}
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits





More information about the llvm-commits mailing list