[llvm] r351017 - [X86] Add X86ISD::VMFPROUND to handle the masked case of VCVTPD2PSZ128 which only produces 2 result elements and zeroes the upper elements.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 12 18:59:58 PST 2019
Author: ctopper
Date: Sat Jan 12 18:59:57 2019
New Revision: 351017
URL: http://llvm.org/viewvc/llvm-project?rev=351017&view=rev
Log:
[X86] Add X86ISD::VMFPROUND to handle the masked case of VCVTPD2PSZ128 which only produces 2 result elements and zeroes the upper elements.
We can't represent this properly with vselect like we normally do. We also have to update the instruction definition to use a VK2WM mask instead of VK4WM to represent this.
Fixes another case from PR34877.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=351017&r1=351016&r2=351017&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Jan 12 18:59:57 2019
@@ -21796,7 +21796,7 @@ SDValue X86TargetLowering::LowerINTRINSI
// does not change the value. Set it to 0 since it can change.
return DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
DAG.getIntPtrConstant(0, dl));
- case CVTPD2PS_MASK: {
+ case CVTPD2PS_RND_MASK: {
SDValue Src = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
@@ -22058,6 +22058,7 @@ SDValue X86TargetLowering::LowerINTRINSI
SDValue Results[] = { SetCC, Res };
return DAG.getMergeValues(Results, dl);
}
+ case CVTPD2PS_MASK:
case TRUNCATE_TO_REG: {
SDValue Src = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
@@ -27217,6 +27218,7 @@ const char *X86TargetLowering::getTarget
case X86ISD::VFPEXT_RND: return "X86ISD::VFPEXT_RND";
case X86ISD::VFPEXTS_RND: return "X86ISD::VFPEXTS_RND";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
+ case X86ISD::VMFPROUND: return "X86ISD::VMFPROUND";
case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND";
case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=351017&r1=351016&r2=351017&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sat Jan 12 18:59:57 2019
@@ -304,6 +304,10 @@ namespace llvm {
// Vector FP round.
VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
+ // Masked version of above. Used for v2f64->v4f32.
+ // SRC, PASSTHRU, MASK
+ VMFPROUND,
+
// 128-bit vector logical left / right shift
VSHLDQ, VSRLDQ,
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=351017&r1=351016&r2=351017&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sat Jan 12 18:59:57 2019
@@ -7969,26 +7969,53 @@ multiclass avx512_vcvt_fp<bits<8> opc, s
X86VectorVTInfo _Src, SDNode OpNode,
X86FoldableSchedWrite sched,
string Broadcast = _.BroadcastStr,
- string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
+ string Alias = "", X86MemOperand MemOp = _Src.MemOp,
+ RegisterClass MaskRC = _.KRCWM> {
- defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
- (_.VT (OpNode (_Src.VT _Src.RC:$src)))>,
+ defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _Src.RC:$src),
+ (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
+ (ins MaskRC:$mask, _Src.RC:$src),
+ OpcodeStr, "$src", "$src",
+ (_.VT (OpNode (_Src.VT _Src.RC:$src))),
+ (vselect MaskRC:$mask,
+ (_.VT (OpNode (_Src.VT _Src.RC:$src))),
+ _.RC:$src0),
+ vselect, "$src0 = $dst">,
EVEX, Sched<[sched]>;
- defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
+ defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins MemOp:$src),
+ (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
+ (ins MaskRC:$mask, MemOp:$src),
+ OpcodeStr#Alias, "$src", "$src",
(_.VT (OpNode (_Src.VT
- (_Src.LdFrag addr:$src))))>,
+ (_Src.LdFrag addr:$src)))),
+ (vselect MaskRC:$mask,
+ (_.VT (OpNode (_Src.VT
+ (_Src.LdFrag addr:$src)))),
+ _.RC:$src0),
+ vselect, "$src0 = $dst">,
EVEX, Sched<[sched.Folded]>;
- defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _Src.ScalarMemOp:$src), OpcodeStr,
+ defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _Src.ScalarMemOp:$src),
+ (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
+ (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
+ OpcodeStr,
"${src}"##Broadcast, "${src}"##Broadcast,
(_.VT (OpNode (_Src.VT
(X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
- ))>, EVEX, EVEX_B,
- Sched<[sched.Folded]>;
+ )),
+ (vselect MaskRC:$mask,
+ (_.VT
+ (OpNode
+ (_Src.VT
+ (X86VBroadcast
+ (_Src.ScalarLdFrag addr:$src))))),
+ _.RC:$src0),
+ vselect, "$src0 = $dst">,
+ EVEX, EVEX_B, Sched<[sched.Folded]>;
}
// Coversion with SAE - suppress all exceptions
multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
@@ -8039,7 +8066,8 @@ multiclass avx512_cvtpd2ps<bits<8> opc,
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
- X86vfpround, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
+ null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
+ EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
sched.YMM, "{1to4}", "{y}">, EVEX_V256;
@@ -8073,6 +8101,35 @@ let Predicates = [HasVLX] in {
(VCVTPS2PDZ128rm addr:$src)>;
def : Pat<(v4f64 (extloadv4f32 addr:$src)),
(VCVTPS2PDZ256rm addr:$src)>;
+
+ // Special patterns to allow use of X86vmfpround for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(X86vfpround (v2f64 VR128X:$src)),
+ (VCVTPD2PSZ128rr VR128X:$src)>;
+ def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+ def : Pat<(X86vfpround (loadv2f64 addr:$src)),
+ (VCVTPD2PSZ128rm addr:$src)>;
+ def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(X86vfpround (v2f64 (X86VBroadcast (loadf64 addr:$src)))),
+ (VCVTPD2PSZ128rmb addr:$src)>;
+ def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ (v4f32 VR128X:$src0), VK2WM:$mask),
+ (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ v4f32x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
}
// Convert Signed/Unsigned Doubleword to Double
Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=351017&r1=351016&r2=351017&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Sat Jan 12 18:59:57 2019
@@ -143,6 +143,14 @@ def X86fpextRnd : SDNode<"X86ISD::VFPEX
SDTCisSameSizeAs<0, 2>,
SDTCisVT<3, i32>]>>;
+def X86vmfpround: SDNode<"X86ISD::VMFPROUND",
+ SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
+ SDTCVecEltisVT<1, f64>,
+ SDTCisSameSizeAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCVecEltisVT<3, i1>,
+ SDTCisSameNumEltsAs<1, 3>]>>;
+
def X86vshiftimm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisVT<2, i8>, SDTCisInt<0>]>;
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=351017&r1=351016&r2=351017&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sat Jan 12 18:59:57 2019
@@ -24,7 +24,7 @@ enum IntrinsicType : uint16_t {
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP_IMM8,
CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM,
- CVTPD2PS, CVTPD2PS_MASK,
+ CVTPD2PS, CVTPD2PS_MASK, CVTPD2PS_RND_MASK,
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
INTR_TYPE_3OP_MASK,
@@ -461,10 +461,10 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
- X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps, INTR_TYPE_1OP_MASK,
- X86ISD::VFPROUND, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS_MASK,
+ X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps, CVTPD2PS_MASK,
+ X86ISD::VFPROUND, X86ISD::VMFPROUND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS_RND_MASK,
ISD::FP_ROUND, X86ISD::VFPROUND_RND),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTP2SI, 0),
More information about the llvm-commits
mailing list