[llvm] r229837 - AVX-512: Full implementation for VRNDSCALESS/SD instructions and intrinsics.
Eric Christopher
echristo at gmail.com
Thu Feb 19 16:47:46 PST 2015
Hi Elena,
This patch and many others in the file don't have Predicates that stop the
instructions from being generated.
Example from your testcase:
dzur:~/tmp> ~/builds/build-llvm/Debug+Asserts/bin/llc avx512-round.ll
-mcpu=corei7-avx -filetype=asm -o - | grep vrndscaless
I'm in the process of trying to fix it with testcase, but here's a patch
that fixes (at least some of it) for this set of instructions.
I've gone ahead and reverted the patch in r229942 , could you please audit
the file to make sure all of the patterns have predicates?
Thanks!
-eric
On Thu Feb 19 2015 at 2:52:24 AM Elena Demikhovsky <
elena.demikhovsky at intel.com> wrote:
> Author: delena
> Date: Thu Feb 19 04:48:04 2015
> New Revision: 229837
>
> URL: http://llvm.org/viewvc/llvm-project?rev=229837&view=rev
> Log:
> AVX-512: Full implementation for VRNDSCALESS/SD instructions and
> intrinsics.
>
>
> Modified:
> llvm/trunk/include/llvm/IR/IntrinsicsX86.td
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> llvm/trunk/lib/Target/X86/X86ISelLowering.h
> llvm/trunk/lib/Target/X86/X86InstrAVX512.td
> llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
> llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
> llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
> llvm/trunk/test/CodeGen/X86/avx512-round.ll
>
> Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/
> llvm/IR/IntrinsicsX86.td?rev=229837&r1=229836&r2=229837&view=diff
> ============================================================
> ==================
> --- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
> +++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Thu Feb 19 04:48:04 2015
> @@ -3193,12 +3193,14 @@ let TargetPrefix = "x86" in { // All in
> Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
> llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty],
> [IntrNoMem]>;
>
> - def int_x86_avx512_rndscale_ss : GCCBuiltin<"__builtin_ia32_
> rndscaless">,
> - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
> - llvm_i32_ty], [IntrNoMem]>;
> - def int_x86_avx512_rndscale_sd : GCCBuiltin<"__builtin_ia32_
> rndscalesd">,
> - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
> - llvm_i32_ty], [IntrNoMem]>;
> + def int_x86_avx512_mask_rndscale_ss : GCCBuiltin<"__builtin_ia32_
> rndscaless_mask">,
> + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
> llvm_v4f32_ty,
> + llvm_i8_ty, llvm_i32_ty,
> llvm_i32_ty],
> + [IntrNoMem]>;
> + def int_x86_avx512_mask_rndscale_sd : GCCBuiltin<"__builtin_ia32_
> rndscalesd_mask">,
> + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
> llvm_v2f64_ty,
> + llvm_i8_ty, llvm_i32_ty,
> llvm_i32_ty],
> + [IntrNoMem]>;
> def int_x86_avx512_sqrt_ss : GCCBuiltin<"__builtin_ia32_
> sqrtrndss">,
> Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
> [IntrNoMem]>;
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/
> X86/X86ISelLowering.cpp?rev=229837&r1=229836&r2=229837&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Feb 19 04:48:04 2015
> @@ -17523,9 +17523,20 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
> SDValue Src2 = Op.getOperand(2);
> SDValue Src0 = Op.getOperand(3);
> SDValue Mask = Op.getOperand(4);
> - SDValue RoundingMode = Op.getOperand(5);
> + // There are 2 kinds of intrinsics in this group:
> + // (1) With supress-all-exceptions (sae) - 6 operands
> + // (2) With rounding mode and sae - 7 operands.
> + if (Op.getNumOperands() == 6) {
> + SDValue Sae = Op.getOperand(5);
> + return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
> Src1, Src2,
> + Sae),
> + Mask, Src0, Subtarget, DAG);
> + }
> + assert(Op.getNumOperands() == 7 && "Unexpected intrinsic form");
> + SDValue RoundingMode = Op.getOperand(5);
> + SDValue Sae = Op.getOperand(6);
> return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
> Src1, Src2,
> - RoundingMode),
> + RoundingMode, Sae),
> Mask, Src0, Subtarget, DAG);
> }
> case INTR_TYPE_2OP_MASK: {
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/
> X86/X86ISelLowering.h?rev=229837&r1=229836&r2=229837&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Thu Feb 19 04:48:04 2015
> @@ -393,7 +393,8 @@ namespace llvm {
> FMSUB_RND,
> FNMSUB_RND,
> FMADDSUB_RND,
> - FMSUBADD_RND,
> + FMSUBADD_RND,
> + RNDSCALE,
>
> // Compress and expand
> COMPRESS,
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/
> X86/X86InstrAVX512.td?rev=229837&r1=229836&r2=229837&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu Feb 19 04:48:04 2015
> @@ -86,6 +86,8 @@ class X86VectorVTInfo<int numelts, Value
> !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
> SSEPackedInt));
>
> + RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
> +
> // A vector type of the same width with element type i32. This is used
> to
> // create the canonical constant zero node ImmAllZerosV.
> ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
> @@ -4637,7 +4639,6 @@ let ExeDomain = d in {
> } // ExeDomain
> }
>
> -
> defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
> loadv16f32, SSEPackedSingle>, EVEX_V512,
> EVEX_CD8<32, CD8VF>;
> @@ -4657,52 +4658,69 @@ def : Pat<(v8f64 (int_x86_avx512_mask_rn
> FROUND_CURRENT)),
> (VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
>
> -multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
> - Operand x86memop, RegisterClass RC, Domain d> {
> -let ExeDomain = d in {
> - def r : AVX512AIi8<opc, MRMSrcReg,
> - (outs RC:$dst), (ins RC:$src1, RC:$src2,
> i32u8imm:$src3),
> - !strconcat(OpcodeStr,
> - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
> - []>, EVEX_4V;
> -
> - def m : AVX512AIi8<opc, MRMSrcMem,
> - (outs RC:$dst), (ins RC:$src1, x86memop:$src2,
> i32u8imm:$src3),
> - !strconcat(OpcodeStr,
> - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
> - []>, EVEX_4V;
> -} // ExeDomain
> -}
> +multiclass
> +avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _>
> {
>
> -defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem,
> FR32X,
> - SSEPackedSingle>, EVEX_CD8<32, CD8VT1>;
> + let ExeDomain = _.ExeDomain in {
> + defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
> + (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
> OpcodeStr,
> + "$src3, $src2, $src1", "$src1, $src2, $src3",
> + (_.VT (X86RndScale (_.VT _.RC:$src1), (_.VT
> _.RC:$src2),
> + (i32 imm:$src3), (i32 FROUND_CURRENT)))>;
>
> -defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem,
> FR64X,
> - SSEPackedDouble>, EVEX_CD8<64, CD8VT1>;
> + defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
> + (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
> OpcodeStr,
> + "$src3, $src2, $src1", "$src1, $src2, $src3",
> + (_.VT (X86RndScale (_.VT _.RC:$src1), (_.VT
> _.RC:$src2),
> + (i32 imm:$src3), (i32 FROUND_NO_EXC))),
> "{sae}">, EVEX_B;
>
> -let Predicates = [HasAVX512] in {
> - def : Pat<(ffloor FR32X:$src),
> - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
> - def : Pat<(f64 (ffloor FR64X:$src)),
> - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>;
> - def : Pat<(f32 (fnearbyint FR32X:$src)),
> - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>;
> - def : Pat<(f64 (fnearbyint FR64X:$src)),
> - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>;
> - def : Pat<(f32 (fceil FR32X:$src)),
> - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>;
> - def : Pat<(f64 (fceil FR64X:$src)),
> - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>;
> - def : Pat<(f32 (frint FR32X:$src)),
> - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>;
> - def : Pat<(f64 (frint FR64X:$src)),
> - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>;
> - def : Pat<(f32 (ftrunc FR32X:$src)),
> - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>;
> - def : Pat<(f64 (ftrunc FR64X:$src)),
> - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
> + let mayLoad = 1 in
> + defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
> + (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
> OpcodeStr,
> + "$src3, $src2, $src1", "$src1, $src2, $src3",
> + (_.VT (X86RndScale (_.VT _.RC:$src1),
> + (_.VT (scalar_to_vector (_.ScalarLdFrag
> addr:$src2))),
> + (i32 imm:$src3), (i32 FROUND_CURRENT)))>;
> + }
> + def : Pat<(ffloor _.FRC:$src), (COPY_TO_REGCLASS
> + (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
> + (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x1))),
> _.FRC)>;
> + def : Pat<(fceil _.FRC:$src), (COPY_TO_REGCLASS
> + (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
> + (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x2))),
> _.FRC)>;
> + def : Pat<(ftrunc _.FRC:$src), (COPY_TO_REGCLASS
> + (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
> + (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x3))),
> _.FRC)>;
> + def : Pat<(frint _.FRC:$src), (COPY_TO_REGCLASS
> + (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
> + (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x4))),
> _.FRC)>;
> + def : Pat<(fnearbyint _.FRC:$src), (COPY_TO_REGCLASS
> + (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
> + (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xc))),
> _.FRC)>;
> +
> + def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
> + (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
> + addr:$src, (i32 0x1))), _.FRC)>;
> + def : Pat<(fceil (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
> + (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
> + addr:$src, (i32 0x2))), _.FRC)>;
> + def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
> + (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
> + addr:$src, (i32 0x3))), _.FRC)>;
> + def : Pat<(frint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
> + (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
> + addr:$src, (i32 0x4))), _.FRC)>;
> + def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
> + (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
> + addr:$src, (i32 0xc))), _.FRC)>;
> }
>
> +defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>,
> + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32,
> CD8VT1>;
> +
> +defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd",
> f64x_info>, VEX_W,
> + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64,
> CD8VT1>;
> +
> def : Pat<(v16f32 (ffloor VR512:$src)),
> (VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
> def : Pat<(v16f32 (fnearbyint VR512:$src)),
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/
> X86/X86InstrFragmentsSIMD.td?rev=229837&r1=229836&r2=229837&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Thu Feb 19
> 04:48:04 2015
> @@ -223,6 +223,8 @@ def STDFp1SrcRm : SDTypeProfile<1, 2, [S
> SDTCisVec<0>, SDTCisInt<2>]>;
> def STDFp2SrcRm : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
> SDTCisVec<0>, SDTCisInt<3>]>;
> +def STDFp3SrcRm : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
> + SDTCisVec<0>, SDTCisInt<3>, SDTCisInt<4>]>;
>
> def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
> def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
> @@ -299,6 +301,7 @@ def X86exp2 : SDNode<"X86ISD::EXP2"
>
> def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>;
> def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>;
> +def X86RndScale : SDNode<"X86ISD::RNDSCALE", STDFp3SrcRm>;
>
> def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1,
> i32>,
> SDTCisVT<2, v16i8>, SDTCisVT<3,
> v16i8>,
>
> Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/
> X86/X86IntrinsicsInfo.h?rev=229837&r1=229836&r2=229837&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
> +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Thu Feb 19 04:48:04 2015
> @@ -378,6 +378,10 @@ static const IntrinsicData IntrinsicsWi
> X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK,
> X86ISD::VSRLI, 0),
> X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK,
> ISD::SRL, 0),
> X86_INTRINSIC_DATA(avx512_mask_psrlv_q, INTR_TYPE_2OP_MASK,
> ISD::SRL, 0),
> + X86_INTRINSIC_DATA(avx512_mask_rndscale_sd, INTR_TYPE_SCALAR_MASK_RM,
> + X86ISD::RNDSCALE, 0),
> + X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM,
> + X86ISD::RNDSCALE, 0),
> X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK,
> ISD::FSUB,
> X86ISD::FSUB_RND),
> X86_INTRINSIC_DATA(avx512_mask_sub_ps_512, INTR_TYPE_2OP_MASK,
> ISD::FSUB,
> @@ -396,8 +400,8 @@ static const IntrinsicData IntrinsicsWi
> X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC,
> X86ISD::CMPMU, 0),
> X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28,
> 0),
> X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28,
> 0),
> - X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RCP28,
> 0),
> - X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RCP28,
> 0),
> + X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM,
> X86ISD::RCP28, 0),
> + X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM,
> X86ISD::RCP28, 0),
> X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28,
> 0),
> X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28,
> 0),
> X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28,
> 0),
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> CodeGen/X86/avx512-intrinsics.ll?rev=229837&r1=229836&r2=229837&view=diff
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Thu Feb 19 04:48:04
> 2015
> @@ -68,6 +68,14 @@ define <8 x double> @test7(<8 x double>
> ret <8 x double>%res
> }
>
> +declare <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double>, <2
> x double>, <2 x double>, i8, i32, i32)
> +
> +define <2 x double> @test_rndsc_sd(<2 x double> %a, <2 x double> %b, <2 x
> double> %c) {
> +; CHECK: vrndscalesd $11, %xmm{{.*}} {%k1} ## encoding:
> [0x62,0xf3,0xfd,0x09,0x0b,0xd1,0x0b]
> + %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x
> double> %a, <2 x double> %b, <2 x double> %c, i8 5, i32 11, i32 4)
> + ret <2 x double>%res
> +}
> +
> declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>,
> i32, <16 x float>, i16, i32)
>
> define <16 x float> @test8(<16 x float> %a) {
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512-round.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> CodeGen/X86/avx512-round.ll?rev=229837&r1=229836&r2=229837&view=diff
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/X86/avx512-round.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-round.ll Thu Feb 19 04:48:04 2015
> @@ -79,3 +79,28 @@ define <8 x double> @nearbyint_v8f64(<8
> ret <8 x double> %res
> }
> declare <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
> +
> +define double @nearbyint_f64(double %a) {
> +; CHECK-LABEL: nearbyint_f64
> +; CHECK: vrndscalesd $12, {{.*}}encoding: [0x62,0xf3,0xfd,0x08,0x0b,
> 0xc0,0x0c]
> + %res = call double @llvm.nearbyint.f64(double %a)
> + ret double %res
> +}
> +declare double @llvm.nearbyint.f64(double %p)
> +
> +define float @floor_f32(float %a) {
> +; CHECK-LABEL: floor_f32
> +; CHECK: vrndscaless $1, {{.*}}encoding: [0x62,0xf3,0x7d,0x08,0x0a,
> 0xc0,0x01]
> + %res = call float @llvm.floor.f32(float %a)
> + ret float %res
> +}
> +declare float @llvm.floor.f32(float %p)
> +
> +define float @floor_f32m(float* %aptr) {
> +; CHECK-LABEL: floor_f32m
> +; CHECK: vrndscaless $1, (%rdi), {{.*}}encoding:
> [0x62,0xf3,0x7d,0x08,0x0a,0x07,0x01]
> + %a = load float* %aptr, align 4
> + %res = call float @llvm.floor.f32(float %a)
> + ret float %res
> +}
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150220/802d17f8/attachment.html>
-------------- next part --------------
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 9d20922..4aa76c5 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -4658,20 +4658,23 @@ defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
loadv16f32, SSEPackedSingle>, EVEX_V512,
EVEX_CD8<32, CD8VF>;
+let Predicates = [HasAVX512] in {
def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
imm:$src2, (v16f32 VR512:$src1), (i16 -1),
FROUND_CURRENT)),
(VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
-
+}
defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
loadv8f64, SSEPackedDouble>, EVEX_V512,
VEX_W, EVEX_CD8<64, CD8VF>;
+let Predicates = [HasAVX512] in {
def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
imm:$src2, (v8f64 VR512:$src1), (i8 -1),
FROUND_CURRENT)),
(VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
+}
multiclass
avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
@@ -4697,6 +4700,7 @@ avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
(i32 imm:$src3), (i32 FROUND_CURRENT)))>;
}
+let Predicates = [HasAVX512] in {
def : Pat<(ffloor _.FRC:$src), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x1))), _.FRC)>;
@@ -4729,13 +4733,14 @@ avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
addr:$src, (i32 0xc))), _.FRC)>;
}
+}
defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>;
-
+let Predicates = [HasAVX512] in {
def : Pat<(v16f32 (ffloor VR512:$src)),
(VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
def : Pat<(v16f32 (fnearbyint VR512:$src)),
@@ -4757,7 +4762,7 @@ def : Pat<(v8f64 (frint VR512:$src)),
(VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
def : Pat<(v8f64 (ftrunc VR512:$src)),
(VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
-
+}
//-------------------------------------------------
// Integer truncate and extend operations
//-------------------------------------------------
More information about the llvm-commits
mailing list