[llvm] r229645 - AVX-512: Added support for FP instructions with embedded rounding mode.
Elena Demikhovsky
elena.demikhovsky at intel.com
Tue Feb 17 23:59:22 PST 2015
Author: delena
Date: Wed Feb 18 01:59:20 2015
New Revision: 229645
URL: http://llvm.org/viewvc/llvm-project?rev=229645&view=rev
Log:
AVX-512: Added support for FP instructions with embedded rounding mode.
By Asaf Badouh <asaf.badouh at intel.com>
Modified:
llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
Modified: llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h?rev=229645&r1=229644&r2=229645&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h (original)
+++ llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h Wed Feb 18 01:59:20 2015
@@ -696,7 +696,7 @@ namespace ISD {
/// which do not reference a specific memory location should be less than
/// this value. Those that do must not be less than this value, and can
/// be used with SelectionDAG::getMemIntrinsicNode.
- static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+180;
+ static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+200;
//===--------------------------------------------------------------------===//
/// MemIndexedMode enum - This enum defines the load / store indexed
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=229645&r1=229644&r2=229645&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Wed Feb 18 01:59:20 2015
@@ -3155,6 +3155,31 @@ let TargetPrefix = "x86" in { // All in
// Arithmetic ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+
+ def int_x86_avx512_mask_add_ps_512 : GCCBuiltin<"__builtin_ia32_addps512_mask">,
+ Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
+ llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_add_pd_512 : GCCBuiltin<"__builtin_ia32_addpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
+ llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_sub_ps_512 : GCCBuiltin<"__builtin_ia32_subps512_mask">,
+ Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
+ llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_sub_pd_512 : GCCBuiltin<"__builtin_ia32_subpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
+ llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_mul_ps_512 : GCCBuiltin<"__builtin_ia32_mulps512_mask">,
+ Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
+ llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_mul_pd_512 : GCCBuiltin<"__builtin_ia32_mulpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
+ llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_div_ps_512 : GCCBuiltin<"__builtin_ia32_divps512_mask">,
+ Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
+ llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_div_pd_512 : GCCBuiltin<"__builtin_ia32_divpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
+ llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_max_ps_512 : GCCBuiltin<"__builtin_ia32_maxps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=229645&r1=229644&r2=229645&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Feb 18 01:59:20 2015
@@ -17595,22 +17595,26 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
Mask, Src0, Subtarget, DAG);
}
case INTR_TYPE_2OP_MASK: {
- SDValue Mask = Op.getOperand(4);
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
SDValue PassThru = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(4);
+ // We specify 2 possible opcodes for intrinsics with rounding modes.
+ // First, we check if the intrinsic may have non-default rounding mode,
+ // (IntrData->Opc1 != 0), then we check the rounding mode operand.
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
- unsigned Round = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
+ SDValue Rnd = Op.getOperand(5);
+ unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue();
if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) {
return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2),
- Op.getOperand(3), Op.getOperand(5)),
+ Src1, Src2, Rnd),
Mask, PassThru, Subtarget, DAG);
}
}
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
- Op.getOperand(1),
- Op.getOperand(2)),
+ Src1,Src2),
Mask, PassThru, Subtarget, DAG);
}
case FMA_OP_MASK: {
@@ -17618,6 +17622,9 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
+ // We specify 2 possible opcodes for intrinsics with rounding modes.
+ // First, we check if the intrinsic may have non-default rounding mode,
+ // (IntrData->Opc1 != 0), then we check the rounding mode operand.
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
SDValue Rnd = Op.getOperand(5);
@@ -20585,6 +20592,10 @@ const char *X86TargetLowering::getTarget
case X86ISD::ADDSUB: return "X86ISD::ADDSUB";
case X86ISD::RCP28: return "X86ISD::RCP28";
case X86ISD::RSQRT28: return "X86ISD::RSQRT28";
+ case X86ISD::FADD_RND: return "X86ISD::FADD_RND";
+ case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND";
+ case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND";
+ case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND";
}
}
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=229645&r1=229644&r2=229645&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Wed Feb 18 01:59:20 2015
@@ -201,7 +201,12 @@ namespace llvm {
/// ADDSUB - Combined add and sub on an FP vector.
ADDSUB,
-
+ // FADD, FSUB, FMUL, FDIV, FMIN, FMAX - FP vector ops with rounding mode.
+ FADD_RND,
+ FSUB_RND,
+ FMUL_RND,
+ FDIV_RND,
+
// SUBUS - Integer sub with unsigned saturation.
SUBUS,
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=229645&r1=229644&r2=229645&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Feb 18 01:59:20 2015
@@ -3269,7 +3269,16 @@ multiclass avx512_fp_packed<bits<8> opc,
}//let mayLoad = 1
}
-multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
+ X86VectorVTInfo _, bit IsCommutable> {
+ defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
+ "$rc, $src2, $src1", "$src1, $src2, $rc",
+ (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>,
+ EVEX_4V, EVEX_B, EVEX_RC;
+}
+
+multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
bit IsCommutable = 0> {
defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
IsCommutable>, EVEX_V512, PS,
@@ -3295,12 +3304,23 @@ multiclass avx512_fp_binop_p<bits<8> opc
}
}
-defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>;
-defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>;
+multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
+ defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info, 0>,
+ EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
+ defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info, 0>,
+ EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
+}
+
+defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>,
+ avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd>;
+defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>,
+ avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd>;
+defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>,
+ avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd>;
+defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>,
+ avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>;
defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, 1>;
defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, 1>;
-defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>;
-defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>;
def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
(v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=229645&r1=229644&r2=229645&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Wed Feb 18 01:59:20 2015
@@ -212,6 +212,9 @@ def SDTVBroadcastm : SDTypeProfile<1, 2,
def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
+def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc.
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<3>]>;
+
def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
@@ -271,6 +274,11 @@ def X86Blendi : SDNode<"X86ISD::BLEND
def X86Addsub : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>;
+def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>;
+def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>;
+def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>;
+def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
+
def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>;
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=229645&r1=229644&r2=229645&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Wed Feb 18 01:59:20 2015
@@ -244,6 +244,10 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::EXP2, 0),
+ X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD,
+ X86ISD::FADD_RND),
+ X86_INTRINSIC_DATA(avx512_mask_add_ps_512, INTR_TYPE_2OP_MASK, ISD::FADD,
+ X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_mask_blend_b_128, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_b_256, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_b_512, BLEND, X86ISD::SELECT, 0),
@@ -299,6 +303,10 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_mask_compress_q_512, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_div_pd_512, INTR_TYPE_2OP_MASK, ISD::FDIV,
+ X86ISD::FDIV_RND),
+ X86_INTRINSIC_DATA(avx512_mask_div_ps_512, INTR_TYPE_2OP_MASK, ISD::FDIV,
+ X86ISD::FDIV_RND),
X86_INTRINSIC_DATA(avx512_mask_expand_d_128, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_d_256, COMPRESS_EXPAND_IN_REG,
@@ -323,6 +331,11 @@ static const IntrinsicData IntrinsicsWi
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
+
+ X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
+ X86ISD::FMUL_RND),
+ X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
+ X86ISD::FMUL_RND),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0),
@@ -365,6 +378,10 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrlv_q, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
+ X86ISD::FSUB_RND),
+ X86_INTRINSIC_DATA(avx512_mask_sub_ps_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
+ X86ISD::FSUB_RND),
X86_INTRINSIC_DATA(avx512_mask_ucmp_b_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_b_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_b_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=229645&r1=229644&r2=229645&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Wed Feb 18 01:59:20 2015
@@ -1439,3 +1439,170 @@ define <8 x i64> @test_x86_avx512_psrlv_
%res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
+
+declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
+
+define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
+ ; CHECK-LABEL: test_vsubps_rn
+ ; CHECK: vsubps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 -1, i32 0)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
+ ; CHECK-LABEL: test_vsubps_rd
+ ; CHECK: vsubps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 -1, i32 1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
+ ; CHECK-LABEL: test_vsubps_ru
+ ; CHECK: vsubps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 -1, i32 2)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
+ ; CHECK-LABEL: test_vsubps_rz
+ ; CHECK: vsubps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 -1, i32 3)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
+ ; CHECK-LABEL: test_vmulps_rn
+ ; CHECK: vmulps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 -1, i32 0)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
+ ; CHECK-LABEL: test_vmulps_rd
+ ; CHECK: vmulps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 -1, i32 1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
+ ; CHECK-LABEL: test_vmulps_ru
+ ; CHECK: vmulps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 -1, i32 2)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
+ ; CHECK-LABEL: test_vmulps_rz
+ ; CHECK: vmulps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 -1, i32 3)
+ ret <16 x float> %res
+}
+
+;; mask float
+define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ; CHECK-LABEL: test_vmulps_mask_rn
+ ; CHECK: vmulps {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 %mask, i32 0)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ; CHECK-LABEL: test_vmulps_mask_rd
+ ; CHECK: vmulps {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 %mask, i32 1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ; CHECK-LABEL: test_vmulps_mask_ru
+ ; CHECK: vmulps {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 %mask, i32 2)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ; CHECK-LABEL: test_vmulps_mask_rz
+ ; CHECK: vmulps {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 %mask, i32 3)
+ ret <16 x float> %res
+}
+
+;; With Passthru value
+define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
+ ; CHECK-LABEL: test_vmulps_mask_passthru_rn
+ ; CHECK: vmulps {rn-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> %passthru, i16 %mask, i32 0)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
+ ; CHECK-LABEL: test_vmulps_mask_passthru_rd
+ ; CHECK: vmulps {rd-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> %passthru, i16 %mask, i32 1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
+ ; CHECK-LABEL: test_vmulps_mask_passthru_ru
+ ; CHECK: vmulps {ru-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> %passthru, i16 %mask, i32 2)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
+ ; CHECK-LABEL: test_vmulps_mask_passthru_rz
+ ; CHECK: vmulps {rz-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> %passthru, i16 %mask, i32 3)
+ ret <16 x float> %res
+}
+
+;; mask double
+define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
+ ; CHECK-LABEL: test_vmulpd_mask_rn
+ ; CHECK: vmulpd {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
+ %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
+ <8 x double> zeroinitializer, i8 %mask, i32 0)
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
+ ; CHECK-LABEL: test_vmulpd_mask_rd
+ ; CHECK: vmulpd {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
+ %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
+ <8 x double> zeroinitializer, i8 %mask, i32 1)
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
+ ; CHECK-LABEL: test_vmulpd_mask_ru
+ ; CHECK: vmulpd {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
+ %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
+ <8 x double> zeroinitializer, i8 %mask, i32 2)
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
+ ; CHECK-LABEL: test_vmulpd_mask_rz
+ ; CHECK: vmulpd {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
+ %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
+ <8 x double> zeroinitializer, i8 %mask, i32 3)
+ ret <8 x double> %res
+}
More information about the llvm-commits
mailing list