[llvm] r227303 - AVX-512: Added FMA intrinsics with rounding mode

Adam Nemet anemet at apple.com
Wed Feb 18 10:16:08 PST 2015


> On Feb 18, 2015, at 12:09 AM, Demikhovsky, Elena <elena.demikhovsky at intel.com> wrote:
> 
>> I think that in the enum where INTR_TYPE_2OP_MASK is defined it would be good to document the meaning of Opc1.  If I remember correctly that differs across the different cases.
> 
> Done in revision 229645.

Thanks!

> Thanks.
> 
> -  Elena
> 
> 
> -----Original Message-----
> From: Adam Nemet [mailto:anemet at apple.com] 
> Sent: Friday, January 30, 2015 08:46
> To: Demikhovsky, Elena
> Cc: llvm-commits at cs.uiuc.edu
> Subject: Re: [llvm] r227303 - AVX-512: Added FMA intrinsics with rounding mode
> 
> Hi Elena,
> 
> Great, thanks!  I just have one follow-on request below.
> 
>> On Jan 28, 2015, at 2:21 AM, Elena Demikhovsky <elena.demikhovsky at intel.com> wrote:
>> 
>> Author: delena
>> Date: Wed Jan 28 04:21:27 2015
>> New Revision: 227303
>> 
>> URL: http://llvm.org/viewvc/llvm-project?rev=227303&view=rev
>> Log:
>> AVX-512: Added FMA intrinsics with rounding mode By Asaf Badouh and 
>> Elena Demikhovsky
>> 
>> Added special nodes for rounding: FMADD_RND, FMSUB_RND..
>> It will prevent merge between nodes with rounding and other standard nodes.
>> 
>> 
>> Modified:
>>  llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>>  llvm/trunk/lib/Target/X86/X86ISelLowering.h
>>  llvm/trunk/lib/Target/X86/X86InstrAVX512.td
>>  llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
>>  llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
>>  llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll
>> 
>> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>> URL: 
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelL
>> owering.cpp?rev=227303&r1=227302&r2=227303&view=diff
>> ======================================================================
>> ========
>> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jan 28 04:21:27 
>> +++ 2015
>> @@ -17039,54 +17039,6 @@ static SDValue getScalarMaskingNode(SDVa
>>   return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, 
>> PreservedSrc); }
>> 
>> -static unsigned getOpcodeForFMAIntrinsic(unsigned IntNo) {
>> -    switch (IntNo) {
>> -    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
>> -    case Intrinsic::x86_fma_vfmadd_ps:
>> -    case Intrinsic::x86_fma_vfmadd_pd:
>> -    case Intrinsic::x86_fma_vfmadd_ps_256:
>> -    case Intrinsic::x86_fma_vfmadd_pd_256:
>> -    case Intrinsic::x86_fma_mask_vfmadd_ps_512:
>> -    case Intrinsic::x86_fma_mask_vfmadd_pd_512:
>> -      return X86ISD::FMADD;
>> -    case Intrinsic::x86_fma_vfmsub_ps:
>> -    case Intrinsic::x86_fma_vfmsub_pd:
>> -    case Intrinsic::x86_fma_vfmsub_ps_256:
>> -    case Intrinsic::x86_fma_vfmsub_pd_256:
>> -    case Intrinsic::x86_fma_mask_vfmsub_ps_512:
>> -    case Intrinsic::x86_fma_mask_vfmsub_pd_512:
>> -      return X86ISD::FMSUB;
>> -    case Intrinsic::x86_fma_vfnmadd_ps:
>> -    case Intrinsic::x86_fma_vfnmadd_pd:
>> -    case Intrinsic::x86_fma_vfnmadd_ps_256:
>> -    case Intrinsic::x86_fma_vfnmadd_pd_256:
>> -    case Intrinsic::x86_fma_mask_vfnmadd_ps_512:
>> -    case Intrinsic::x86_fma_mask_vfnmadd_pd_512:
>> -      return X86ISD::FNMADD;
>> -    case Intrinsic::x86_fma_vfnmsub_ps:
>> -    case Intrinsic::x86_fma_vfnmsub_pd:
>> -    case Intrinsic::x86_fma_vfnmsub_ps_256:
>> -    case Intrinsic::x86_fma_vfnmsub_pd_256:
>> -    case Intrinsic::x86_fma_mask_vfnmsub_ps_512:
>> -    case Intrinsic::x86_fma_mask_vfnmsub_pd_512:
>> -      return X86ISD::FNMSUB;
>> -    case Intrinsic::x86_fma_vfmaddsub_ps:
>> -    case Intrinsic::x86_fma_vfmaddsub_pd:
>> -    case Intrinsic::x86_fma_vfmaddsub_ps_256:
>> -    case Intrinsic::x86_fma_vfmaddsub_pd_256:
>> -    case Intrinsic::x86_fma_mask_vfmaddsub_ps_512:
>> -    case Intrinsic::x86_fma_mask_vfmaddsub_pd_512:
>> -      return X86ISD::FMADDSUB;
>> -    case Intrinsic::x86_fma_vfmsubadd_ps:
>> -    case Intrinsic::x86_fma_vfmsubadd_pd:
>> -    case Intrinsic::x86_fma_vfmsubadd_ps_256:
>> -    case Intrinsic::x86_fma_vfmsubadd_pd_256:
>> -    case Intrinsic::x86_fma_mask_vfmsubadd_ps_512:
>> -    case Intrinsic::x86_fma_mask_vfmsubadd_pd_512:
>> -      return X86ISD::FMSUBADD;
>> -    }
>> -}
>> -
>> static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
>>                                      SelectionDAG &DAG) {  SDLoc 
>> dl(Op); @@ -17123,9 +17075,43 @@ static SDValue 
>> LowerINTRINSIC_WO_CHAIN(S
>>                                 Mask, Src0, Subtarget, DAG);
>>   }
>>   case INTR_TYPE_2OP_MASK: {
>> -      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
>> +      SDValue Mask = Op.getOperand(4);
>> +      SDValue PassThru = Op.getOperand(3);
>> +      unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
> 
> I think that in the enum where INTR_TYPE_2OP_MASK is defined it would be good to document the meaning of Opc1.  If I remember correctly that differs across the different cases.
> 
> Thanks,
> Adam
> 
>> +      if (IntrWithRoundingModeOpcode != 0) {
>> +        unsigned Round = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
>> +        if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) {
>> +          return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
>> +                                      dl, Op.getValueType(),
>> +                                      Op.getOperand(1), Op.getOperand(2),
>> +                                      Op.getOperand(3), Op.getOperand(5)),
>> +                                      Mask, PassThru, Subtarget, DAG);
>> +        }
>> +      }
>> +      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
>> +                                              Op.getOperand(1),
>>                                             Op.getOperand(2)),
>> -                                  Op.getOperand(4), Op.getOperand(3), Subtarget, DAG);
>> +                                  Mask, PassThru, Subtarget, DAG);
>> +    }
>> +    case FMA_OP_MASK: {
>> +      SDValue Src1 = Op.getOperand(1);
>> +      SDValue Src2 = Op.getOperand(2);
>> +      SDValue Src3 = Op.getOperand(3);
>> +      SDValue Mask = Op.getOperand(4);
>> +      unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
>> +      if (IntrWithRoundingModeOpcode != 0) {
>> +        SDValue Rnd = Op.getOperand(5);
>> +        if (cast<ConstantSDNode>(Rnd)->getZExtValue() !=
>> +            X86::STATIC_ROUNDING::CUR_DIRECTION)
>> +          return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
>> +                                                  dl, Op.getValueType(),
>> +                                                  Src1, Src2, Src3, Rnd),
>> +                                      Mask, Src1, Subtarget, DAG);
>> +      }
>> +      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
>> +                                              dl, Op.getValueType(),
>> +                                              Src1, Src2, Src3),
>> +                                  Mask, Src1, Subtarget, DAG);
>>   }
>>   case CMP_MASK:
>>   case CMP_MASK_CC: {
>> @@ -17215,16 +17201,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
>>     return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1),
>>                        Op.getOperand(2));
>>   }
>> -    case FMA_OP_MASK:
>> -    {
>> -        return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
>> -            dl, Op.getValueType(),
>> -            Op.getOperand(1),
>> -            Op.getOperand(2),
>> -            Op.getOperand(3)),
>> -            Op.getOperand(4), Op.getOperand(1),
>> -            Subtarget, DAG);
>> -    }
>>   default:
>>     break;
>>   }
>> @@ -17395,58 +17371,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
>>   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
>>   return DAG.getNode(Opcode, dl, VTs, NewOps);  }
>> -
>> -  case Intrinsic::x86_fma_mask_vfmadd_ps_512:
>> -  case Intrinsic::x86_fma_mask_vfmadd_pd_512:
>> -  case Intrinsic::x86_fma_mask_vfmsub_ps_512:
>> -  case Intrinsic::x86_fma_mask_vfmsub_pd_512:
>> -  case Intrinsic::x86_fma_mask_vfnmadd_ps_512:
>> -  case Intrinsic::x86_fma_mask_vfnmadd_pd_512:
>> -  case Intrinsic::x86_fma_mask_vfnmsub_ps_512:
>> -  case Intrinsic::x86_fma_mask_vfnmsub_pd_512:
>> -  case Intrinsic::x86_fma_mask_vfmaddsub_ps_512:
>> -  case Intrinsic::x86_fma_mask_vfmaddsub_pd_512:
>> -  case Intrinsic::x86_fma_mask_vfmsubadd_ps_512:
>> -  case Intrinsic::x86_fma_mask_vfmsubadd_pd_512: {
>> -    auto *SAE = cast<ConstantSDNode>(Op.getOperand(5));
>> -    if (SAE->getZExtValue() == X86::STATIC_ROUNDING::CUR_DIRECTION)
>> -      return getVectorMaskingNode(DAG.getNode(getOpcodeForFMAIntrinsic(IntNo),
>> -                                              dl, Op.getValueType(),
>> -                                              Op.getOperand(1),
>> -                                              Op.getOperand(2),
>> -                                              Op.getOperand(3)),
>> -                                  Op.getOperand(4), Op.getOperand(1),
>> -                                  Subtarget, DAG);
>> -    else
>> -      return SDValue();
>> -  }
>> -
>> -  case Intrinsic::x86_fma_vfmadd_ps:
>> -  case Intrinsic::x86_fma_vfmadd_pd:
>> -  case Intrinsic::x86_fma_vfmsub_ps:
>> -  case Intrinsic::x86_fma_vfmsub_pd:
>> -  case Intrinsic::x86_fma_vfnmadd_ps:
>> -  case Intrinsic::x86_fma_vfnmadd_pd:
>> -  case Intrinsic::x86_fma_vfnmsub_ps:
>> -  case Intrinsic::x86_fma_vfnmsub_pd:
>> -  case Intrinsic::x86_fma_vfmaddsub_ps:
>> -  case Intrinsic::x86_fma_vfmaddsub_pd:
>> -  case Intrinsic::x86_fma_vfmsubadd_ps:
>> -  case Intrinsic::x86_fma_vfmsubadd_pd:
>> -  case Intrinsic::x86_fma_vfmadd_ps_256:
>> -  case Intrinsic::x86_fma_vfmadd_pd_256:
>> -  case Intrinsic::x86_fma_vfmsub_ps_256:
>> -  case Intrinsic::x86_fma_vfmsub_pd_256:
>> -  case Intrinsic::x86_fma_vfnmadd_ps_256:
>> -  case Intrinsic::x86_fma_vfnmadd_pd_256:
>> -  case Intrinsic::x86_fma_vfnmsub_ps_256:
>> -  case Intrinsic::x86_fma_vfnmsub_pd_256:
>> -  case Intrinsic::x86_fma_vfmaddsub_ps_256:
>> -  case Intrinsic::x86_fma_vfmaddsub_pd_256:
>> -  case Intrinsic::x86_fma_vfmsubadd_ps_256:
>> -  case Intrinsic::x86_fma_vfmsubadd_pd_256:
>> -    return DAG.getNode(getOpcodeForFMAIntrinsic(IntNo), dl, Op.getValueType(),
>> -                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
>> }
>> }
>> 
>> 
>> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
>> URL: 
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelL
>> owering.h?rev=227303&r1=227302&r2=227303&view=diff
>> ======================================================================
>> ========
>> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
>> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Wed Jan 28 04:21:27 
>> +++ 2015
>> @@ -378,6 +378,13 @@ namespace llvm {
>>     FNMSUB,
>>     FMADDSUB,
>>     FMSUBADD,
>> +      // FMA with rounding mode
>> +      FMADD_RND,
>> +      FNMADD_RND,
>> +      FMSUB_RND,
>> +      FNMSUB_RND,
>> +      FMADDSUB_RND,
>> +      FMSUBADD_RND,     
>> 
>>     // Compress and expand
>>     COMPRESS,
>> 
>> Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
>> URL: 
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Instr
>> AVX512.td?rev=227303&r1=227302&r2=227303&view=diff
>> ======================================================================
>> ========
>> --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
>> +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Jan 28 04:21:27 
>> +++ 2015
>> @@ -3582,6 +3582,24 @@ multiclass avx512_fma3p_rm<bits<8> opc, } } // 
>> Constraints = "$src1 = $dst"
>> 
>> +let Constraints = "$src1 = $dst" in { // Omitting the parameter 
>> +OpNode (= null_frag) disables ISel pattern matching.
>> +multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
>> +                           SDPatternOperator OpNode> {
>> +   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
>> +          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
>> +          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
>> +          (_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
>> +          AVX512FMA3Base, EVEX_B, EVEX_RC;  } } // Constraints = 
>> +"$src1 = $dst"
>> +
>> +multiclass avx512_fma3_round_forms<bits<8> opc213, string OpcodeStr,
>> +                              X86VectorVTInfo VTI, SDPatternOperator 
>> +OpNode> {
>> +  defm v213r : avx512_fma3_round_rrb<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
>> +                              VTI, OpNode>, EVEX_CD8<VTI.EltSize, 
>> +CD8VF>; }
>> +
>> multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
>>                             string OpcodeStr, X86VectorVTInfo VTI,
>>                             SDPatternOperator OpNode> { @@ -3594,10 
>> +3612,13 @@ multiclass avx512_fma3p_forms<bits<8> op
>> 
>> multiclass avx512_fma3p<bits<8> opc213, bits<8> opc231,
>>                             string OpcodeStr,
>> -                              SDPatternOperator OpNode> {
>> +                              SDPatternOperator OpNode,
>> +                              SDPatternOperator OpNodeRnd> {
>> let ExeDomain = SSEPackedSingle in {
>>   defm NAME##PSZ      : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
>> -                                             v16f32_info, OpNode>, EVEX_V512;
>> +                                             v16f32_info, OpNode>,
>> +                          avx512_fma3_round_forms<opc213, OpcodeStr,
>> +                                             v16f32_info, OpNodeRnd>, 
>> + EVEX_V512;
>>   defm NAME##PSZ256   : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
>>                                            v8f32x_info, OpNode>, EVEX_V256;
>>   defm NAME##PSZ128   : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
>> @@ -3605,7 +3626,9 @@ let ExeDomain = SSEPackedSingle in {  } let 
>> ExeDomain = SSEPackedDouble in {
>>   defm  NAME##PDZ     : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
>> -                                             v8f64_info, OpNode>, EVEX_V512, VEX_W;
>> +                                             v8f64_info, OpNode>,
>> +                          avx512_fma3_round_forms<opc213, OpcodeStr,
>> +                                             v8f64_info, OpNodeRnd>, 
>> + EVEX_V512, VEX_W;
>>   defm  NAME##PDZ256  : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
>>                                            v4f64x_info, OpNode>, EVEX_V256, VEX_W;
>>   defm  NAME##PDZ128  : avx512_fma3p_forms<opc213, opc231, OpcodeStr, 
>> @@ -3613,12 +3636,12 @@ let ExeDomain = SSEPackedDouble in {  } }
>> 
>> -defm VFMADD    : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd>;
>> -defm VFMSUB    : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub>;
>> -defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub>; 
>> -defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd>;
>> -defm VFNMADD   : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd>;
>> -defm VFNMSUB   : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub>;
>> +defm VFMADD    : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd, X86FmaddRnd>;
>> +defm VFMSUB    : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub, X86FmsubRnd>;
>> +defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub, 
>> +X86FmaddsubRnd>; defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd, X86FmsubaddRnd>;
>> +defm VFNMADD   : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
>> +defm VFNMSUB   : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
>> 
>> let Constraints = "$src1 = $dst" in {
>> multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode 
>> OpNode,
>> 
>> Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
>> URL: 
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Instr
>> FragmentsSIMD.td?rev=227303&r1=227302&r2=227303&view=diff
>> ======================================================================
>> ========
>> --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
>> +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Wed Jan 28 
>> +++ 04:21:27 2015
>> @@ -203,6 +203,8 @@ def SDTBlend : SDTypeProfile<1, 3, [SDTC
>> 
>> def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
>>                          SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
>> +def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
>> +                           SDTCisSameAs<1,2>, SDTCisSameAs<1,3>, 
>> +SDTCisInt<4>]>;
>> def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>,
>>                          SDTCisVec<0>, SDTCisInt<2>]>; def 
>> STDFp2SrcRm : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
>> @@ -265,6 +267,13 @@ def X86Fnmsub    : SDNode<"X86ISD::FNMSU
>> def X86Fmaddsub  : SDNode<"X86ISD::FMADDSUB",  SDTFma>; def 
>> X86Fmsubadd  : SDNode<"X86ISD::FMSUBADD",  SDTFma>;
>> 
>> +def X86FmaddRnd     : SDNode<"X86ISD::FMADD_RND",     SDTFmaRound>;
>> +def X86FnmaddRnd    : SDNode<"X86ISD::FNMADD_RND",    SDTFmaRound>;
>> +def X86FmsubRnd     : SDNode<"X86ISD::FMSUB_RND",     SDTFmaRound>;
>> +def X86FnmsubRnd    : SDNode<"X86ISD::FNMSUB_RND",    SDTFmaRound>;
>> +def X86FmaddsubRnd  : SDNode<"X86ISD::FMADDSUB_RND",  SDTFmaRound>; 
>> +def X86FmsubaddRnd  : SDNode<"X86ISD::FMSUBADD_RND",  SDTFmaRound>;
>> +
>> def X86rsqrt28   : SDNode<"X86ISD::RSQRT28",  STDFp1SrcRm>;
>> def X86rcp28     : SDNode<"X86ISD::RCP28",    STDFp1SrcRm>;
>> def X86exp2      : SDNode<"X86ISD::EXP2",     STDFp1SrcRm>;
>> 
>> Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
>> URL: 
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Intri
>> nsicsInfo.h?rev=227303&r1=227302&r2=227303&view=diff
>> ======================================================================
>> ========
>> --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
>> +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Wed Jan 28 04:21:27 
>> +++ 2015
>> @@ -398,30 +398,78 @@ static const IntrinsicData  IntrinsicsWi  
>> X86_INTRINSIC_DATA(avx_vperm2f128_pd_256, INTR_TYPE_3OP, 
>> X86ISD::VPERM2X128, 0),  X86_INTRINSIC_DATA(avx_vperm2f128_ps_256, 
>> INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),  
>> X86_INTRINSIC_DATA(avx_vperm2f128_si_256, INTR_TYPE_3OP, 
>> X86ISD::VPERM2X128, 0),
>> -  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128, FMA_OP_MASK, 
>> X86ISD::FMADD, 0),
>> -  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256, FMA_OP_MASK, 
>> X86ISD::FMADD, 0),
>> -  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128, FMA_OP_MASK, 
>> X86ISD::FMADD, 0),
>> -  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256, FMA_OP_MASK, 
>> X86ISD::FMADD, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128,    FMA_OP_MASK, X86ISD::FMADD, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256,    FMA_OP_MASK, X86ISD::FMADD, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_512,    FMA_OP_MASK, X86ISD::FMADD,
>> +                     X86ISD::FMADD_RND),
>> +  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128,    FMA_OP_MASK, X86ISD::FMADD, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256,    FMA_OP_MASK, X86ISD::FMADD, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_512,    FMA_OP_MASK, X86ISD::FMADD,
>> +                     X86ISD::FMADD_RND),
>> X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_128, FMA_OP_MASK, 
>> X86ISD::FMADDSUB, 0),  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_256, 
>> FMA_OP_MASK, X86ISD::FMADDSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
>> +                     X86ISD::FMADDSUB_RND),
>> X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_128, FMA_OP_MASK, 
>> X86ISD::FMADDSUB, 0),  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_256, 
>> FMA_OP_MASK, X86ISD::FMADDSUB, 0),
>> -  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128, FMA_OP_MASK, 
>> X86ISD::FMSUB, 0),
>> -  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256, FMA_OP_MASK, 
>> X86ISD::FMSUB, 0),
>> -  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128, FMA_OP_MASK, 
>> X86ISD::FMSUB, 0),
>> -  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256, FMA_OP_MASK, 
>> X86ISD::FMSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB,
>> +                     X86ISD::FMADDSUB_RND),
>> +  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128,    FMA_OP_MASK, X86ISD::FMSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256,    FMA_OP_MASK, X86ISD::FMSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_512,    FMA_OP_MASK, X86ISD::FMSUB,
>> +                     X86ISD::FMSUB_RND),
>> +  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128,    FMA_OP_MASK, X86ISD::FMSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256,    FMA_OP_MASK, X86ISD::FMSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_512,    FMA_OP_MASK, X86ISD::FMSUB,
>> +                     X86ISD::FMSUB_RND),
>> X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_128, FMA_OP_MASK, 
>> X86ISD::FMSUBADD, 0),  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_256, 
>> FMA_OP_MASK, X86ISD::FMSUBADD, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_512, FMA_OP_MASK, X86ISD::FMSUBADD,
>> +                     X86ISD::FMSUBADD_RND),
>> X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_128, FMA_OP_MASK, 
>> X86ISD::FMSUBADD, 0),  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_256, 
>> FMA_OP_MASK, X86ISD::FMSUBADD, 0),
>> -  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128, FMA_OP_MASK, 
>> X86ISD::FNMADD, 0),
>> -  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256, FMA_OP_MASK, 
>> X86ISD::FNMADD, 0),
>> -  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128, FMA_OP_MASK, 
>> X86ISD::FNMADD, 0),
>> -  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256, FMA_OP_MASK, 
>> X86ISD::FNMADD, 0),
>> -  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128, FMA_OP_MASK, 
>> X86ISD::FNMSUB, 0),
>> -  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256, FMA_OP_MASK, 
>> X86ISD::FNMSUB, 0),
>> -  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128, FMA_OP_MASK, 
>> X86ISD::FNMSUB , 0),
>> -  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256, FMA_OP_MASK, 
>> X86ISD::FNMSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_512, FMA_OP_MASK, X86ISD::FMSUBADD,
>> +                     X86ISD::FMSUBADD_RND),
>> +  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128,   FMA_OP_MASK, X86ISD::FNMADD, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256,   FMA_OP_MASK, X86ISD::FNMADD, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_512,   FMA_OP_MASK, X86ISD::FNMADD,
>> +                     X86ISD::FNMADD_RND),
>> +  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128,   FMA_OP_MASK, X86ISD::FNMADD, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256,   FMA_OP_MASK, X86ISD::FNMADD, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_512,   FMA_OP_MASK, X86ISD::FNMADD,
>> +                     X86ISD::FNMADD_RND),
>> +  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_512,   FMA_OP_MASK, X86ISD::FNMSUB,
>> +                     X86ISD::FNMSUB_RND),
>> +  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_512,   FMA_OP_MASK, X86ISD::FNMSUB,
>> +                     X86ISD::FNMSUB_RND),
>> +  X86_INTRINSIC_DATA(fma_vfmadd_pd,        INTR_TYPE_3OP, X86ISD::FMADD, 0),
>> +  X86_INTRINSIC_DATA(fma_vfmadd_pd_256,    INTR_TYPE_3OP, X86ISD::FMADD, 0),
>> +  X86_INTRINSIC_DATA(fma_vfmadd_ps,        INTR_TYPE_3OP, X86ISD::FMADD, 0),
>> +  X86_INTRINSIC_DATA(fma_vfmadd_ps_256,    INTR_TYPE_3OP, X86ISD::FMADD, 0),
>> +  X86_INTRINSIC_DATA(fma_vfmaddsub_pd,     INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_vfmaddsub_ps,     INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_vfmaddsub_ps_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_vfmsub_pd,        INTR_TYPE_3OP, X86ISD::FMSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_vfmsub_pd_256,    INTR_TYPE_3OP, X86ISD::FMSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_vfmsub_ps,        INTR_TYPE_3OP, X86ISD::FMSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_vfmsub_ps_256,    INTR_TYPE_3OP, X86ISD::FMSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_vfmsubadd_pd,     INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
>> +  X86_INTRINSIC_DATA(fma_vfmsubadd_pd_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
>> +  X86_INTRINSIC_DATA(fma_vfmsubadd_ps,     INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
>> +  X86_INTRINSIC_DATA(fma_vfmsubadd_ps_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
>> +  X86_INTRINSIC_DATA(fma_vfnmadd_pd,       INTR_TYPE_3OP, X86ISD::FNMADD, 0),
>> +  X86_INTRINSIC_DATA(fma_vfnmadd_pd_256,   INTR_TYPE_3OP, X86ISD::FNMADD, 0),
>> +  X86_INTRINSIC_DATA(fma_vfnmadd_ps,       INTR_TYPE_3OP, X86ISD::FNMADD, 0),
>> +  X86_INTRINSIC_DATA(fma_vfnmadd_ps_256,   INTR_TYPE_3OP, X86ISD::FNMADD, 0),
>> +  X86_INTRINSIC_DATA(fma_vfnmsub_pd,       INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_vfnmsub_pd_256,   INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_vfnmsub_ps,       INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
>> +  X86_INTRINSIC_DATA(fma_vfnmsub_ps_256,   INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
>> X86_INTRINSIC_DATA(sse2_comieq_sd,    COMI, X86ISD::COMI, ISD::SETEQ),
>> X86_INTRINSIC_DATA(sse2_comige_sd,    COMI, X86ISD::COMI, ISD::SETGE),
>> X86_INTRINSIC_DATA(sse2_comigt_sd,    COMI, X86ISD::COMI, ISD::SETGT),
>> 
>> Modified: llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll
>> URL: 
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512
>> -fma-intrinsics.ll?rev=227303&r1=227302&r2=227303&view=diff
>> ======================================================================
>> ========
>> --- llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll Wed Jan 28 
>> +++ 04:21:27 2015
>> @@ -182,3 +182,283 @@ define <8 x double> @test_mask_vfmsubadd  ret <8 
>> x double> %res }
>> 
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne
>> +  ; CHECK: vfmadd213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
>> +encoding: [0x62,0xf2,0x75,0x19,0xa8,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn
>> +  ; CHECK: vfmadd213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
>> +encoding: [0x62,0xf2,0x75,0x39,0xa8,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp
>> +  ; CHECK: vfmadd213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
>> +encoding: [0x62,0xf2,0x75,0x59,0xa8,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz
>> +  ; CHECK: vfmadd213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
>> +encoding: [0x62,0xf2,0x75,0x79,0xa8,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current
>> +  ; CHECK: vfmadd213ps  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: 
>> +[0x62,0xf2,0x75,0x49,0xa8,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne
>> +  ; CHECK: vfmadd213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0x75,0x18,0xa8,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn
>> +  ; CHECK: vfmadd213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0x75,0x38,0xa8,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp
>> +  ; CHECK: vfmadd213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0x75,0x58,0xa8,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz
>> +  ; CHECK: vfmadd213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0x75,0x78,0xa8,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current
>> +  ; CHECK: vfmadd213ps  %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0x75,0x48,0xa8,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rne(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rne
>> +  ; CHECK: vfmsub213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
>> +encoding: [0x62,0xf2,0x75,0x19,0xaa,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtn(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtn
>> +  ; CHECK: vfmsub213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
>> +encoding: [0x62,0xf2,0x75,0x39,0xaa,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtp(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtp
>> +  ; CHECK: vfmsub213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
>> +encoding: [0x62,0xf2,0x75,0x59,0xaa,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtz(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtz
>> +  ; CHECK: vfmsub213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
>> +encoding: [0x62,0xf2,0x75,0x79,0xaa,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_current(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_current
>> +  ; CHECK: vfmsub213ps  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: 
>> +[0x62,0xf2,0x75,0x49,0xaa,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rne(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rne
>> +  ; CHECK: vfmsub213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0x75,0x18,0xaa,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtn(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtn
>> +  ; CHECK: vfmsub213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0x75,0x38,0xaa,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtp(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtp
>> +  ; CHECK: vfmsub213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0x75,0x58,0xaa,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtz(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtz
>> +  ; CHECK: vfmsub213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0x75,0x78,0xaa,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_current(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_current
>> +  ; CHECK: vfmsub213ps  %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0x75,0x48,0xaa,0xc2]
>> +  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x 
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) 
>> +nounwind
>> +  ret <16 x float> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne
>> +  ; CHECK: vfmadd213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
>> +encoding: [0x62,0xf2,0xf5,0x19,0xa8,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn
>> +  ; CHECK: vfmadd213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
>> +encoding: [0x62,0xf2,0xf5,0x39,0xa8,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp
>> +  ; CHECK: vfmadd213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
>> +encoding: [0x62,0xf2,0xf5,0x59,0xa8,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz
>> +  ; CHECK: vfmadd213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
>> +encoding: [0x62,0xf2,0xf5,0x79,0xa8,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current
>> +  ; CHECK: vfmadd213pd  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: 
>> +[0x62,0xf2,0xf5,0x49,0xa8,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne
>> +  ; CHECK: vfmadd213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0xf5,0x18,0xa8,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn
>> +  ; CHECK: vfmadd213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0xf5,0x38,0xa8,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp
>> +  ; CHECK: vfmadd213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0xf5,0x58,0xa8,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz
>> +  ; CHECK: vfmadd213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0xf5,0x78,0xa8,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current
>> +  ; CHECK: vfmadd213pd  %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0xf5,0x48,0xa8,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> +
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne
>> +  ; CHECK: vfnmsub213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
>> +encoding: [0x62,0xf2,0xf5,0x19,0xae,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn
>> +  ; CHECK: vfnmsub213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
>> +encoding: [0x62,0xf2,0xf5,0x39,0xae,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp
>> +  ; CHECK: vfnmsub213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
>> +encoding: [0x62,0xf2,0xf5,0x59,0xae,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz
>> +  ; CHECK: vfnmsub213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## 
>> +encoding: [0x62,0xf2,0xf5,0x79,0xae,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current
>> +  ; CHECK: vfnmsub213pd  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: 
>> +[0x62,0xf2,0xf5,0x49,0xae,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne
>> +  ; CHECK: vfnmsub213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0xf5,0x18,0xae,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn
>> +  ; CHECK: vfnmsub213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0xf5,0x38,0xae,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp
>> +  ; CHECK: vfnmsub213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0xf5,0x58,0xae,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz
>> +  ; CHECK: vfnmsub213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0xf5,0x78,0xae,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> +  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current
>> +  ; CHECK: vfnmsub213pd  %zmm2, %zmm1, %zmm0 ## encoding: 
>> +[0x62,0xf2,0xf5,0x48,0xae,0xc2]
>> +  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x 
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) 
>> +nounwind
>> +  ret <8 x double> %res
>> +}
>> 
>> 
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> 
> ---------------------------------------------------------------------
> Intel Israel (74) Limited
> 
> This e-mail and any attachments may contain confidential material for
> the sole use of the intended recipient(s). Any review or distribution
> by others is strictly prohibited. If you are not the intended
> recipient, please contact the sender and delete all copies.
> 





More information about the llvm-commits mailing list