[llvm] r227303 - AVX-512: Added FMA intrinsics with rounding mode
Adam Nemet
anemet at apple.com
Wed Feb 18 10:16:08 PST 2015
> On Feb 18, 2015, at 12:09 AM, Demikhovsky, Elena <elena.demikhovsky at intel.com> wrote:
>
>> I think that in the enum where INTR_TYPE_2OP_MASK is defined it would be good to document the meaning of Opc1. If I remember correctly that differs across the different cases.
>
> Done in revision 229645.
Thanks!
> Thanks.
>
> - Elena
>
>
> -----Original Message-----
> From: Adam Nemet [mailto:anemet at apple.com]
> Sent: Friday, January 30, 2015 08:46
> To: Demikhovsky, Elena
> Cc: llvm-commits at cs.uiuc.edu
> Subject: Re: [llvm] r227303 - AVX-512: Added FMA intrinsics with rounding mode
>
> Hi Elena,
>
> Great, thanks! I just have one follow-on request below.
>
>> On Jan 28, 2015, at 2:21 AM, Elena Demikhovsky <elena.demikhovsky at intel.com> wrote:
>>
>> Author: delena
>> Date: Wed Jan 28 04:21:27 2015
>> New Revision: 227303
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=227303&view=rev
>> Log:
>> AVX-512: Added FMA intrinsics with rounding mode By Asaf Badouh and
>> Elena Demikhovsky
>>
>> Added special nodes for rounding: FMADD_RND, FMSUB_RND..
>> It will prevent merge between nodes with rounding and other standard nodes.
>>
>>
>> Modified:
>> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>> llvm/trunk/lib/Target/X86/X86ISelLowering.h
>> llvm/trunk/lib/Target/X86/X86InstrAVX512.td
>> llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
>> llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
>> llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll
>>
>> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelL
>> owering.cpp?rev=227303&r1=227302&r2=227303&view=diff
>> ======================================================================
>> ========
>> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jan 28 04:21:27
>> +++ 2015
>> @@ -17039,54 +17039,6 @@ static SDValue getScalarMaskingNode(SDVa
>> return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op,
>> PreservedSrc); }
>>
>> -static unsigned getOpcodeForFMAIntrinsic(unsigned IntNo) {
>> - switch (IntNo) {
>> - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
>> - case Intrinsic::x86_fma_vfmadd_ps:
>> - case Intrinsic::x86_fma_vfmadd_pd:
>> - case Intrinsic::x86_fma_vfmadd_ps_256:
>> - case Intrinsic::x86_fma_vfmadd_pd_256:
>> - case Intrinsic::x86_fma_mask_vfmadd_ps_512:
>> - case Intrinsic::x86_fma_mask_vfmadd_pd_512:
>> - return X86ISD::FMADD;
>> - case Intrinsic::x86_fma_vfmsub_ps:
>> - case Intrinsic::x86_fma_vfmsub_pd:
>> - case Intrinsic::x86_fma_vfmsub_ps_256:
>> - case Intrinsic::x86_fma_vfmsub_pd_256:
>> - case Intrinsic::x86_fma_mask_vfmsub_ps_512:
>> - case Intrinsic::x86_fma_mask_vfmsub_pd_512:
>> - return X86ISD::FMSUB;
>> - case Intrinsic::x86_fma_vfnmadd_ps:
>> - case Intrinsic::x86_fma_vfnmadd_pd:
>> - case Intrinsic::x86_fma_vfnmadd_ps_256:
>> - case Intrinsic::x86_fma_vfnmadd_pd_256:
>> - case Intrinsic::x86_fma_mask_vfnmadd_ps_512:
>> - case Intrinsic::x86_fma_mask_vfnmadd_pd_512:
>> - return X86ISD::FNMADD;
>> - case Intrinsic::x86_fma_vfnmsub_ps:
>> - case Intrinsic::x86_fma_vfnmsub_pd:
>> - case Intrinsic::x86_fma_vfnmsub_ps_256:
>> - case Intrinsic::x86_fma_vfnmsub_pd_256:
>> - case Intrinsic::x86_fma_mask_vfnmsub_ps_512:
>> - case Intrinsic::x86_fma_mask_vfnmsub_pd_512:
>> - return X86ISD::FNMSUB;
>> - case Intrinsic::x86_fma_vfmaddsub_ps:
>> - case Intrinsic::x86_fma_vfmaddsub_pd:
>> - case Intrinsic::x86_fma_vfmaddsub_ps_256:
>> - case Intrinsic::x86_fma_vfmaddsub_pd_256:
>> - case Intrinsic::x86_fma_mask_vfmaddsub_ps_512:
>> - case Intrinsic::x86_fma_mask_vfmaddsub_pd_512:
>> - return X86ISD::FMADDSUB;
>> - case Intrinsic::x86_fma_vfmsubadd_ps:
>> - case Intrinsic::x86_fma_vfmsubadd_pd:
>> - case Intrinsic::x86_fma_vfmsubadd_ps_256:
>> - case Intrinsic::x86_fma_vfmsubadd_pd_256:
>> - case Intrinsic::x86_fma_mask_vfmsubadd_ps_512:
>> - case Intrinsic::x86_fma_mask_vfmsubadd_pd_512:
>> - return X86ISD::FMSUBADD;
>> - }
>> -}
>> -
>> static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
>> SelectionDAG &DAG) { SDLoc
>> dl(Op); @@ -17123,9 +17075,43 @@ static SDValue
>> LowerINTRINSIC_WO_CHAIN(S
>> Mask, Src0, Subtarget, DAG);
>> }
>> case INTR_TYPE_2OP_MASK: {
>> - return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
>> + SDValue Mask = Op.getOperand(4);
>> + SDValue PassThru = Op.getOperand(3);
>> + unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
>
> I think that in the enum where INTR_TYPE_2OP_MASK is defined it would be good to document the meaning of Opc1. If I remember correctly that differs across the different cases.
>
> Thanks,
> Adam
>
>> + if (IntrWithRoundingModeOpcode != 0) {
>> + unsigned Round = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
>> + if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) {
>> + return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
>> + dl, Op.getValueType(),
>> + Op.getOperand(1), Op.getOperand(2),
>> + Op.getOperand(3), Op.getOperand(5)),
>> + Mask, PassThru, Subtarget, DAG);
>> + }
>> + }
>> + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
>> + Op.getOperand(1),
>> Op.getOperand(2)),
>> - Op.getOperand(4), Op.getOperand(3), Subtarget, DAG);
>> + Mask, PassThru, Subtarget, DAG);
>> + }
>> + case FMA_OP_MASK: {
>> + SDValue Src1 = Op.getOperand(1);
>> + SDValue Src2 = Op.getOperand(2);
>> + SDValue Src3 = Op.getOperand(3);
>> + SDValue Mask = Op.getOperand(4);
>> + unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
>> + if (IntrWithRoundingModeOpcode != 0) {
>> + SDValue Rnd = Op.getOperand(5);
>> + if (cast<ConstantSDNode>(Rnd)->getZExtValue() !=
>> + X86::STATIC_ROUNDING::CUR_DIRECTION)
>> + return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
>> + dl, Op.getValueType(),
>> + Src1, Src2, Src3, Rnd),
>> + Mask, Src1, Subtarget, DAG);
>> + }
>> + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
>> + dl, Op.getValueType(),
>> + Src1, Src2, Src3),
>> + Mask, Src1, Subtarget, DAG);
>> }
>> case CMP_MASK:
>> case CMP_MASK_CC: {
>> @@ -17215,16 +17201,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
>> return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1),
>> Op.getOperand(2));
>> }
>> - case FMA_OP_MASK:
>> - {
>> - return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
>> - dl, Op.getValueType(),
>> - Op.getOperand(1),
>> - Op.getOperand(2),
>> - Op.getOperand(3)),
>> - Op.getOperand(4), Op.getOperand(1),
>> - Subtarget, DAG);
>> - }
>> default:
>> break;
>> }
>> @@ -17395,58 +17371,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
>> SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
>> return DAG.getNode(Opcode, dl, VTs, NewOps); }
>> -
>> - case Intrinsic::x86_fma_mask_vfmadd_ps_512:
>> - case Intrinsic::x86_fma_mask_vfmadd_pd_512:
>> - case Intrinsic::x86_fma_mask_vfmsub_ps_512:
>> - case Intrinsic::x86_fma_mask_vfmsub_pd_512:
>> - case Intrinsic::x86_fma_mask_vfnmadd_ps_512:
>> - case Intrinsic::x86_fma_mask_vfnmadd_pd_512:
>> - case Intrinsic::x86_fma_mask_vfnmsub_ps_512:
>> - case Intrinsic::x86_fma_mask_vfnmsub_pd_512:
>> - case Intrinsic::x86_fma_mask_vfmaddsub_ps_512:
>> - case Intrinsic::x86_fma_mask_vfmaddsub_pd_512:
>> - case Intrinsic::x86_fma_mask_vfmsubadd_ps_512:
>> - case Intrinsic::x86_fma_mask_vfmsubadd_pd_512: {
>> - auto *SAE = cast<ConstantSDNode>(Op.getOperand(5));
>> - if (SAE->getZExtValue() == X86::STATIC_ROUNDING::CUR_DIRECTION)
>> - return getVectorMaskingNode(DAG.getNode(getOpcodeForFMAIntrinsic(IntNo),
>> - dl, Op.getValueType(),
>> - Op.getOperand(1),
>> - Op.getOperand(2),
>> - Op.getOperand(3)),
>> - Op.getOperand(4), Op.getOperand(1),
>> - Subtarget, DAG);
>> - else
>> - return SDValue();
>> - }
>> -
>> - case Intrinsic::x86_fma_vfmadd_ps:
>> - case Intrinsic::x86_fma_vfmadd_pd:
>> - case Intrinsic::x86_fma_vfmsub_ps:
>> - case Intrinsic::x86_fma_vfmsub_pd:
>> - case Intrinsic::x86_fma_vfnmadd_ps:
>> - case Intrinsic::x86_fma_vfnmadd_pd:
>> - case Intrinsic::x86_fma_vfnmsub_ps:
>> - case Intrinsic::x86_fma_vfnmsub_pd:
>> - case Intrinsic::x86_fma_vfmaddsub_ps:
>> - case Intrinsic::x86_fma_vfmaddsub_pd:
>> - case Intrinsic::x86_fma_vfmsubadd_ps:
>> - case Intrinsic::x86_fma_vfmsubadd_pd:
>> - case Intrinsic::x86_fma_vfmadd_ps_256:
>> - case Intrinsic::x86_fma_vfmadd_pd_256:
>> - case Intrinsic::x86_fma_vfmsub_ps_256:
>> - case Intrinsic::x86_fma_vfmsub_pd_256:
>> - case Intrinsic::x86_fma_vfnmadd_ps_256:
>> - case Intrinsic::x86_fma_vfnmadd_pd_256:
>> - case Intrinsic::x86_fma_vfnmsub_ps_256:
>> - case Intrinsic::x86_fma_vfnmsub_pd_256:
>> - case Intrinsic::x86_fma_vfmaddsub_ps_256:
>> - case Intrinsic::x86_fma_vfmaddsub_pd_256:
>> - case Intrinsic::x86_fma_vfmsubadd_ps_256:
>> - case Intrinsic::x86_fma_vfmsubadd_pd_256:
>> - return DAG.getNode(getOpcodeForFMAIntrinsic(IntNo), dl, Op.getValueType(),
>> - Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
>> }
>> }
>>
>>
>> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelL
>> owering.h?rev=227303&r1=227302&r2=227303&view=diff
>> ======================================================================
>> ========
>> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
>> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Wed Jan 28 04:21:27
>> +++ 2015
>> @@ -378,6 +378,13 @@ namespace llvm {
>> FNMSUB,
>> FMADDSUB,
>> FMSUBADD,
>> + // FMA with rounding mode
>> + FMADD_RND,
>> + FNMADD_RND,
>> + FMSUB_RND,
>> + FNMSUB_RND,
>> + FMADDSUB_RND,
>> + FMSUBADD_RND,
>>
>> // Compress and expand
>> COMPRESS,
>>
>> Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Instr
>> AVX512.td?rev=227303&r1=227302&r2=227303&view=diff
>> ======================================================================
>> ========
>> --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
>> +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Jan 28 04:21:27
>> +++ 2015
>> @@ -3582,6 +3582,24 @@ multiclass avx512_fma3p_rm<bits<8> opc, } } //
>> Constraints = "$src1 = $dst"
>>
>> +let Constraints = "$src1 = $dst" in { // Omitting the parameter
>> +OpNode (= null_frag) disables ISel pattern matching.
>> +multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
>> + SDPatternOperator OpNode> {
>> + defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
>> + (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
>> + OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
>> + (_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
>> + AVX512FMA3Base, EVEX_B, EVEX_RC; } } // Constraints =
>> +"$src1 = $dst"
>> +
>> +multiclass avx512_fma3_round_forms<bits<8> opc213, string OpcodeStr,
>> + X86VectorVTInfo VTI, SDPatternOperator
>> +OpNode> {
>> + defm v213r : avx512_fma3_round_rrb<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
>> + VTI, OpNode>, EVEX_CD8<VTI.EltSize,
>> +CD8VF>; }
>> +
>> multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
>> string OpcodeStr, X86VectorVTInfo VTI,
>> SDPatternOperator OpNode> { @@ -3594,10
>> +3612,13 @@ multiclass avx512_fma3p_forms<bits<8> op
>>
>> multiclass avx512_fma3p<bits<8> opc213, bits<8> opc231,
>> string OpcodeStr,
>> - SDPatternOperator OpNode> {
>> + SDPatternOperator OpNode,
>> + SDPatternOperator OpNodeRnd> {
>> let ExeDomain = SSEPackedSingle in {
>> defm NAME##PSZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
>> - v16f32_info, OpNode>, EVEX_V512;
>> + v16f32_info, OpNode>,
>> + avx512_fma3_round_forms<opc213, OpcodeStr,
>> + v16f32_info, OpNodeRnd>,
>> + EVEX_V512;
>> defm NAME##PSZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
>> v8f32x_info, OpNode>, EVEX_V256;
>> defm NAME##PSZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
>> @@ -3605,7 +3626,9 @@ let ExeDomain = SSEPackedSingle in { } let
>> ExeDomain = SSEPackedDouble in {
>> defm NAME##PDZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
>> - v8f64_info, OpNode>, EVEX_V512, VEX_W;
>> + v8f64_info, OpNode>,
>> + avx512_fma3_round_forms<opc213, OpcodeStr,
>> + v8f64_info, OpNodeRnd>,
>> + EVEX_V512, VEX_W;
>> defm NAME##PDZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
>> v4f64x_info, OpNode>, EVEX_V256, VEX_W;
>> defm NAME##PDZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
>> @@ -3613,12 +3636,12 @@ let ExeDomain = SSEPackedDouble in { } }
>>
>> -defm VFMADD : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd>;
>> -defm VFMSUB : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub>;
>> -defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub>;
>> -defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd>;
>> -defm VFNMADD : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd>;
>> -defm VFNMSUB : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub>;
>> +defm VFMADD : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd, X86FmaddRnd>;
>> +defm VFMSUB : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub, X86FmsubRnd>;
>> +defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub,
>> +X86FmaddsubRnd>; defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd, X86FmsubaddRnd>;
>> +defm VFNMADD : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
>> +defm VFNMSUB : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
>>
>> let Constraints = "$src1 = $dst" in {
>> multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode
>> OpNode,
>>
>> Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Instr
>> FragmentsSIMD.td?rev=227303&r1=227302&r2=227303&view=diff
>> ======================================================================
>> ========
>> --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
>> +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Wed Jan 28
>> +++ 04:21:27 2015
>> @@ -203,6 +203,8 @@ def SDTBlend : SDTypeProfile<1, 3, [SDTC
>>
>> def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
>> SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
>> +def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
>> + SDTCisSameAs<1,2>, SDTCisSameAs<1,3>,
>> +SDTCisInt<4>]>;
>> def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>,
>> SDTCisVec<0>, SDTCisInt<2>]>; def
>> STDFp2SrcRm : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
>> @@ -265,6 +267,13 @@ def X86Fnmsub : SDNode<"X86ISD::FNMSU
>> def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFma>; def
>> X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFma>;
>>
>> +def X86FmaddRnd : SDNode<"X86ISD::FMADD_RND", SDTFmaRound>;
>> +def X86FnmaddRnd : SDNode<"X86ISD::FNMADD_RND", SDTFmaRound>;
>> +def X86FmsubRnd : SDNode<"X86ISD::FMSUB_RND", SDTFmaRound>;
>> +def X86FnmsubRnd : SDNode<"X86ISD::FNMSUB_RND", SDTFmaRound>;
>> +def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound>;
>> +def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound>;
>> +
>> def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", STDFp1SrcRm>;
>> def X86rcp28 : SDNode<"X86ISD::RCP28", STDFp1SrcRm>;
>> def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>;
>>
>> Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Intri
>> nsicsInfo.h?rev=227303&r1=227302&r2=227303&view=diff
>> ======================================================================
>> ========
>> --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
>> +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Wed Jan 28 04:21:27
>> +++ 2015
>> @@ -398,30 +398,78 @@ static const IntrinsicData IntrinsicsWi
>> X86_INTRINSIC_DATA(avx_vperm2f128_pd_256, INTR_TYPE_3OP,
>> X86ISD::VPERM2X128, 0), X86_INTRINSIC_DATA(avx_vperm2f128_ps_256,
>> INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
>> X86_INTRINSIC_DATA(avx_vperm2f128_si_256, INTR_TYPE_3OP,
>> X86ISD::VPERM2X128, 0),
>> - X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128, FMA_OP_MASK,
>> X86ISD::FMADD, 0),
>> - X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256, FMA_OP_MASK,
>> X86ISD::FMADD, 0),
>> - X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128, FMA_OP_MASK,
>> X86ISD::FMADD, 0),
>> - X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256, FMA_OP_MASK,
>> X86ISD::FMADD, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,
>> + X86ISD::FMADD_RND),
>> + X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_512, FMA_OP_MASK, X86ISD::FMADD,
>> + X86ISD::FMADD_RND),
>> X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_128, FMA_OP_MASK,
>> X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_256,
>> FMA_OP_MASK, X86ISD::FMADDSUB, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
>> + X86ISD::FMADDSUB_RND),
>> X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_128, FMA_OP_MASK,
>> X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_256,
>> FMA_OP_MASK, X86ISD::FMADDSUB, 0),
>> - X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128, FMA_OP_MASK,
>> X86ISD::FMSUB, 0),
>> - X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256, FMA_OP_MASK,
>> X86ISD::FMSUB, 0),
>> - X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128, FMA_OP_MASK,
>> X86ISD::FMSUB, 0),
>> - X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256, FMA_OP_MASK,
>> X86ISD::FMSUB, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB,
>> + X86ISD::FMADDSUB_RND),
>> + X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_512, FMA_OP_MASK, X86ISD::FMSUB,
>> + X86ISD::FMSUB_RND),
>> + X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_512, FMA_OP_MASK, X86ISD::FMSUB,
>> + X86ISD::FMSUB_RND),
>> X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_128, FMA_OP_MASK,
>> X86ISD::FMSUBADD, 0), X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_256,
>> FMA_OP_MASK, X86ISD::FMSUBADD, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_512, FMA_OP_MASK, X86ISD::FMSUBADD,
>> + X86ISD::FMSUBADD_RND),
>> X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_128, FMA_OP_MASK,
>> X86ISD::FMSUBADD, 0), X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_256,
>> FMA_OP_MASK, X86ISD::FMSUBADD, 0),
>> - X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128, FMA_OP_MASK,
>> X86ISD::FNMADD, 0),
>> - X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256, FMA_OP_MASK,
>> X86ISD::FNMADD, 0),
>> - X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128, FMA_OP_MASK,
>> X86ISD::FNMADD, 0),
>> - X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256, FMA_OP_MASK,
>> X86ISD::FNMADD, 0),
>> - X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128, FMA_OP_MASK,
>> X86ISD::FNMSUB, 0),
>> - X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256, FMA_OP_MASK,
>> X86ISD::FNMSUB, 0),
>> - X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128, FMA_OP_MASK,
>> X86ISD::FNMSUB , 0),
>> - X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256, FMA_OP_MASK,
>> X86ISD::FNMSUB, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_512, FMA_OP_MASK, X86ISD::FMSUBADD,
>> + X86ISD::FMSUBADD_RND),
>> + X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_512, FMA_OP_MASK, X86ISD::FNMADD,
>> + X86ISD::FNMADD_RND),
>> + X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_512, FMA_OP_MASK, X86ISD::FNMADD,
>> + X86ISD::FNMADD_RND),
>> + X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_512, FMA_OP_MASK, X86ISD::FNMSUB,
>> + X86ISD::FNMSUB_RND),
>> + X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
>> + X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
>> + X86ISD::FNMSUB_RND),
>> + X86_INTRINSIC_DATA(fma_vfmadd_pd, INTR_TYPE_3OP, X86ISD::FMADD, 0),
>> + X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, X86ISD::FMADD, 0),
>> + X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, X86ISD::FMADD, 0),
>> + X86_INTRINSIC_DATA(fma_vfmadd_ps_256, INTR_TYPE_3OP, X86ISD::FMADD, 0),
>> + X86_INTRINSIC_DATA(fma_vfmaddsub_pd, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
>> + X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
>> + X86_INTRINSIC_DATA(fma_vfmaddsub_ps, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
>> + X86_INTRINSIC_DATA(fma_vfmaddsub_ps_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
>> + X86_INTRINSIC_DATA(fma_vfmsub_pd, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
>> + X86_INTRINSIC_DATA(fma_vfmsub_pd_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
>> + X86_INTRINSIC_DATA(fma_vfmsub_ps, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
>> + X86_INTRINSIC_DATA(fma_vfmsub_ps_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
>> + X86_INTRINSIC_DATA(fma_vfmsubadd_pd, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
>> + X86_INTRINSIC_DATA(fma_vfmsubadd_pd_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
>> + X86_INTRINSIC_DATA(fma_vfmsubadd_ps, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
>> + X86_INTRINSIC_DATA(fma_vfmsubadd_ps_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
>> + X86_INTRINSIC_DATA(fma_vfnmadd_pd, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
>> + X86_INTRINSIC_DATA(fma_vfnmadd_pd_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
>> + X86_INTRINSIC_DATA(fma_vfnmadd_ps, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
>> + X86_INTRINSIC_DATA(fma_vfnmadd_ps_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
>> + X86_INTRINSIC_DATA(fma_vfnmsub_pd, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
>> + X86_INTRINSIC_DATA(fma_vfnmsub_pd_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
>> + X86_INTRINSIC_DATA(fma_vfnmsub_ps, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
>> + X86_INTRINSIC_DATA(fma_vfnmsub_ps_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
>> X86_INTRINSIC_DATA(sse2_comieq_sd, COMI, X86ISD::COMI, ISD::SETEQ),
>> X86_INTRINSIC_DATA(sse2_comige_sd, COMI, X86ISD::COMI, ISD::SETGE),
>> X86_INTRINSIC_DATA(sse2_comigt_sd, COMI, X86ISD::COMI, ISD::SETGT),
>>
>> Modified: llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512
>> -fma-intrinsics.ll?rev=227303&r1=227302&r2=227303&view=diff
>> ======================================================================
>> ========
>> --- llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll Wed Jan 28
>> +++ 04:21:27 2015
>> @@ -182,3 +182,283 @@ define <8 x double> @test_mask_vfmsubadd ret <8
>> x double> %res }
>>
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne
>> + ; CHECK: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ##
>> +encoding: [0x62,0xf2,0x75,0x19,0xa8,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn
>> + ; CHECK: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ##
>> +encoding: [0x62,0xf2,0x75,0x39,0xa8,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp
>> + ; CHECK: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ##
>> +encoding: [0x62,0xf2,0x75,0x59,0xa8,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz
>> + ; CHECK: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ##
>> +encoding: [0x62,0xf2,0x75,0x79,0xa8,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current
>> + ; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding:
>> +[0x62,0xf2,0x75,0x49,0xa8,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne
>> + ; CHECK: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0x75,0x18,0xa8,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn
>> + ; CHECK: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0x75,0x38,0xa8,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp
>> + ; CHECK: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0x75,0x58,0xa8,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz
>> + ; CHECK: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0x75,0x78,0xa8,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current
>> + ; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0x75,0x48,0xa8,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rne(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rne
>> + ; CHECK: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ##
>> +encoding: [0x62,0xf2,0x75,0x19,0xaa,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtn(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtn
>> + ; CHECK: vfmsub213ps {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ##
>> +encoding: [0x62,0xf2,0x75,0x39,0xaa,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtp(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtp
>> + ; CHECK: vfmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ##
>> +encoding: [0x62,0xf2,0x75,0x59,0xaa,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtz(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtz
>> + ; CHECK: vfmsub213ps {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ##
>> +encoding: [0x62,0xf2,0x75,0x79,0xaa,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrb_current(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_current
>> + ; CHECK: vfmsub213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding:
>> +[0x62,0xf2,0x75,0x49,0xaa,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rne(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rne
>> + ; CHECK: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0x75,0x18,0xaa,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtn(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtn
>> + ; CHECK: vfmsub213ps {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0x75,0x38,0xaa,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtp(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtp
>> + ; CHECK: vfmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0x75,0x58,0xaa,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtz(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtz
>> + ; CHECK: vfmsub213ps {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0x75,0x78,0xaa,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_current(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_current
>> + ; CHECK: vfmsub213ps %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0x75,0x48,0xaa,0xc2]
>> + %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x
>> +float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4)
>> +nounwind
>> + ret <16 x float> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne
>> + ; CHECK: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ##
>> +encoding: [0x62,0xf2,0xf5,0x19,0xa8,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn
>> + ; CHECK: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ##
>> +encoding: [0x62,0xf2,0xf5,0x39,0xa8,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp
>> + ; CHECK: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ##
>> +encoding: [0x62,0xf2,0xf5,0x59,0xa8,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz
>> + ; CHECK: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ##
>> +encoding: [0x62,0xf2,0xf5,0x79,0xa8,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current
>> + ; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding:
>> +[0x62,0xf2,0xf5,0x49,0xa8,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne
>> + ; CHECK: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0xf5,0x18,0xa8,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn
>> + ; CHECK: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0xf5,0x38,0xa8,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp
>> + ; CHECK: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0xf5,0x58,0xa8,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz
>> + ; CHECK: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0xf5,0x78,0xa8,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current
>> + ; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0xf5,0x48,0xa8,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>> +
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne
>> + ; CHECK: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ##
>> +encoding: [0x62,0xf2,0xf5,0x19,0xae,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn
>> + ; CHECK: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ##
>> +encoding: [0x62,0xf2,0xf5,0x39,0xae,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp
>> + ; CHECK: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ##
>> +encoding: [0x62,0xf2,0xf5,0x59,0xae,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz
>> + ; CHECK: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ##
>> +encoding: [0x62,0xf2,0xf5,0x79,0xae,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
>> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current
>> + ; CHECK: vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding:
>> +[0x62,0xf2,0xf5,0x49,0xae,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne
>> + ; CHECK: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0xf5,0x18,0xae,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn
>> + ; CHECK: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0xf5,0x38,0xae,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp
>> + ; CHECK: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0xf5,0x58,0xae,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz
>> + ; CHECK: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0xf5,0x78,0xae,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>> +
>> +define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2) {
>> + ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current
>> + ; CHECK: vfnmsub213pd %zmm2, %zmm1, %zmm0 ## encoding:
>> +[0x62,0xf2,0xf5,0x48,0xae,0xc2]
>> + %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x
>> +double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4)
>> +nounwind
>> + ret <8 x double> %res
>> +}
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
> ---------------------------------------------------------------------
> Intel Israel (74) Limited
>
> This e-mail and any attachments may contain confidential material for
> the sole use of the intended recipient(s). Any review or distribution
> by others is strictly prohibited. If you are not the intended
> recipient, please contact the sender and delete all copies.
>
More information about the llvm-commits
mailing list