[llvm] r350994 - [X86] Add ISD node for masked version of CVTPS2PH.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 12 00:05:12 PST 2019
Author: ctopper
Date: Sat Jan 12 00:05:12 2019
New Revision: 350994
URL: http://llvm.org/viewvc/llvm-project?rev=350994&view=rev
Log:
[X86] Add ISD node for masked version of CVTPS2PH.
The 128-bit input produces 64-bits of output and fills the upper 64-bits with 0. The mask only applies to the lower elements. But we can't represent this with a vselect like we normally do.
This also avoids the need to have a special X86ISD::SELECT when avx512bw isn't enabled since vselect v8i16 isn't legal there.
Fixes another instruction for PR34877.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=350994&r1=350993&r2=350994&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Jan 12 00:05:12 2019
@@ -21383,12 +21383,6 @@ static SDValue getVectorMaskingNode(SDVa
case X86ISD::VPSHUFBITQMB:
case X86ISD::VFPCLASS:
return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
- case X86ISD::CVTPS2PH:
- // We can't use ISD::VSELECT here because it is not always "Legal"
- // for the destination type. For example vpmovqb require only AVX512
- // and vselect that can operate on byte element type require BWI
- OpcodeSelect = X86ISD::SELECT;
- break;
}
if (PreservedSrc.isUndef())
PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
@@ -22068,9 +22062,7 @@ SDValue X86TargetLowering::LowerINTRINSI
SDValue Mask = Op.getOperand(3);
if (isAllOnesConstant(Mask))
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl,
- Op.getValueType(), Src),
- Mask, PassThru, Subtarget, DAG);
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src);
MVT SrcVT = Src.getSimpleValueType();
MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
@@ -22078,6 +22070,22 @@ SDValue X86TargetLowering::LowerINTRINSI
return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru,
Mask);
}
+ case CVTPS2PH_MASK: {
+ SDValue Src = Op.getOperand(1);
+ SDValue Rnd = Op.getOperand(2);
+ SDValue PassThru = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(4);
+
+ if (isAllOnesConstant(Mask))
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src, Rnd);
+
+ MVT SrcVT = Src.getSimpleValueType();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
+ Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+ return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, Rnd,
+ PassThru, Mask);
+
+ }
default:
break;
}
@@ -27365,6 +27373,7 @@ const char *X86TargetLowering::getTarget
case X86ISD::SCALAR_SINT_TO_FP_RND: return "X86ISD::SCALAR_SINT_TO_FP_RND";
case X86ISD::SCALAR_UINT_TO_FP_RND: return "X86ISD::SCALAR_UINT_TO_FP_RND";
case X86ISD::CVTPS2PH: return "X86ISD::CVTPS2PH";
+ case X86ISD::MCVTPS2PH: return "X86ISD::MCVTPS2PH";
case X86ISD::CVTPH2PS: return "X86ISD::CVTPH2PS";
case X86ISD::CVTPH2PS_RND: return "X86ISD::CVTPH2PS_RND";
case X86ISD::CVTP2SI: return "X86ISD::CVTP2SI";
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=350994&r1=350993&r2=350994&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sat Jan 12 00:05:12 2019
@@ -556,6 +556,10 @@ namespace llvm {
// Conversions between float and half-float.
CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
+ // Masked version of above.
+ // SRC, RND, PASSTHRU, MASK
+ MCVTPS2PH,
+
// Galois Field Arithmetic Instructions
GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=350994&r1=350993&r2=350994&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sat Jan 12 00:05:12 2019
@@ -8726,12 +8726,28 @@ let Predicates = [HasVLX] in {
multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
- defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
- (ins _src.RC:$src1, i32u8imm:$src2),
- "vcvtps2ph", "$src2, $src1", "$src1, $src2",
- (X86cvtps2ph (_src.VT _src.RC:$src1),
- (i32 imm:$src2)), 0, 0>,
- AVX512AIi8Base, Sched<[RR]>;
+let ExeDomain = GenericDomain in {
+ def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
+ (ins _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set _dest.RC:$dst,
+ (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>,
+ Sched<[RR]>;
+ let Constraints = "$src0 = $dst" in
+ def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
+ (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
+ [(set _dest.RC:$dst,
+ (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
+ _dest.RC:$src0, _src.KRCWM:$mask))]>,
+ Sched<[RR]>, EVEX_K;
+ def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
+ (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
+ [(set _dest.RC:$dst,
+ (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
+ _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
+ Sched<[RR]>, EVEX_KZ;
let hasSideEffects = 0, mayStore = 1 in {
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
@@ -8743,6 +8759,7 @@ multiclass avx512_cvtps2ph<X86VectorVTIn
EVEX_K, Sched<[MR]>, NotMemoryFoldable;
}
}
+}
multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
SchedWrite Sched> {
Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=350994&r1=350993&r2=350994&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Sat Jan 12 00:05:12 2019
@@ -595,6 +595,13 @@ def X86cvtps2ph : SDNode<"X86ISD::CVTP
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
SDTCVecEltisVT<1, f32>,
SDTCisVT<2, i32>]> >;
+def X86mcvtps2ph : SDNode<"X86ISD::MCVTPS2PH",
+ SDTypeProfile<1, 4, [SDTCVecEltisVT<0, i16>,
+ SDTCVecEltisVT<1, f32>,
+ SDTCisVT<2, i32>,
+ SDTCisSameAs<0, 3>,
+ SDTCVecEltisVT<4, i1>,
+ SDTCisSameNumEltsAs<1, 4>]> >;
def X86vfpextRnd : SDNode<"X86ISD::VFPEXT_RND",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
SDTCVecEltisVT<1, f32>,
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=350994&r1=350993&r2=350994&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sat Jan 12 00:05:12 2019
@@ -32,7 +32,7 @@ enum IntrinsicType : uint16_t {
IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK,
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
COMPRESS_EXPAND_IN_REG,
- TRUNCATE_TO_REG,
+ TRUNCATE_TO_REG, CVTPS2PH_MASK,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
FIXUPIMMS_MASKZ, GATHER_AVX2,
@@ -838,12 +838,12 @@ static const IntrinsicData IntrinsicsWi
X86ISD::CVTPH2PS, 0),
X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK,
X86ISD::CVTPH2PS, X86ISD::CVTPH2PS_RND),
- X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, INTR_TYPE_2OP_MASK,
- X86ISD::CVTPS2PH, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, INTR_TYPE_2OP_MASK,
- X86ISD::CVTPS2PH, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, INTR_TYPE_2OP_MASK,
- X86ISD::CVTPS2PH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, CVTPS2PH_MASK,
+ X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, CVTPS2PH_MASK,
+ X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, CVTPS2PH_MASK,
+ X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_128, CMP_MASK,
X86ISD::VPSHUFBITQMB, 0),
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=350994&r1=350993&r2=350994&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sat Jan 12 00:05:12 2019
@@ -990,8 +990,8 @@ define <16 x i16> @test_x86_vcvtps2ph_25
; CHECK-LABEL: test_x86_vcvtps2ph_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm1 {%k1}
; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm1 {%k1}
; CHECK-NEXT: vpaddw %ymm1, %ymm2, %ymm1
; CHECK-NEXT: vcvtps2ph $2, %zmm0, (%rsi)
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=350994&r1=350993&r2=350994&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Sat Jan 12 00:05:12 2019
@@ -4306,21 +4306,21 @@ define <8 x i16> @test_x86_vcvtps2ph_128
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vcvtps2ph $2, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc2,0x02]
+; X86-NEXT: vcvtps2ph $2, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x1d,0xc3,0x02]
; X86-NEXT: vcvtps2ph $2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1d,0xc1,0x02]
-; X86-NEXT: vcvtps2ph $2, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x1d,0xc2,0x02]
-; X86-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
-; X86-NEXT: vcvtps2ph $2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x02]
-; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
+; X86-NEXT: vpaddw %xmm1, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfd,0xc1]
+; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_x86_vcvtps2ph_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vcvtps2ph $2, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc2,0x02]
+; X64-NEXT: vcvtps2ph $2, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x1d,0xc3,0x02]
; X64-NEXT: vcvtps2ph $2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1d,0xc1,0x02]
-; X64-NEXT: vcvtps2ph $2, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x1d,0xc2,0x02]
-; X64-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
-; X64-NEXT: vcvtps2ph $2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x02]
-; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
+; X64-NEXT: vpaddw %xmm1, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfd,0xc1]
+; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
%res2 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 %mask)
@@ -4337,22 +4337,22 @@ define <8 x i16> @test_x86_vcvtps2ph_256
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vcvtps2ph $2, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc2,0x02]
+; X86-NEXT: vcvtps2ph $2, %ymm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0xc3,0x02]
; X86-NEXT: vcvtps2ph $2, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1d,0xc1,0x02]
-; X86-NEXT: vcvtps2ph $2, %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0xc2,0x02]
-; X86-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
-; X86-NEXT: vcvtps2ph $2, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x02]
-; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
+; X86-NEXT: vpaddw %xmm1, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfd,0xc1]
+; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_x86_vcvtps2ph_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vcvtps2ph $2, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc2,0x02]
+; X64-NEXT: vcvtps2ph $2, %ymm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0xc3,0x02]
; X64-NEXT: vcvtps2ph $2, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1d,0xc1,0x02]
-; X64-NEXT: vcvtps2ph $2, %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0xc2,0x02]
-; X64-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
-; X64-NEXT: vcvtps2ph $2, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x02]
-; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
+; X64-NEXT: vpaddw %xmm1, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfd,0xc1]
+; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
More information about the llvm-commits
mailing list