[llvm] r317544 - [X86] Add support for using EVEX instructions for the legacy vcvtph2ps intrinsics.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 6 23:13:03 PST 2017
Author: ctopper
Date: Mon Nov 6 23:13:03 2017
New Revision: 317544
URL: http://llvm.org/viewvc/llvm-project?rev=317544&view=rev
Log:
[X86] Add support for using EVEX instructions for the legacy vcvtph2ps intrinsics.
Looks like there's some missed load folding opportunities for i64 loads.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/f16c-intrinsics.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=317544&r1=317543&r2=317544&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Nov 6 23:13:03 2017
@@ -25176,6 +25176,7 @@ const char *X86TargetLowering::getTarget
case X86ISD::SCALAR_UINT_TO_FP_RND: return "X86ISD::SCALAR_UINT_TO_FP_RND";
case X86ISD::CVTPS2PH: return "X86ISD::CVTPS2PH";
case X86ISD::CVTPH2PS: return "X86ISD::CVTPH2PS";
+ case X86ISD::CVTPH2PS_RND: return "X86ISD::CVTPH2PS_RND";
case X86ISD::CVTP2SI: return "X86ISD::CVTP2SI";
case X86ISD::CVTP2UI: return "X86ISD::CVTP2UI";
case X86ISD::CVTP2SI_RND: return "X86ISD::CVTP2SI_RND";
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=317544&r1=317543&r2=317544&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Mon Nov 6 23:13:03 2017
@@ -563,7 +563,7 @@ namespace llvm {
RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2,
// Conversions between float and half-float.
- CVTPS2PH, CVTPH2PS,
+ CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
// LWP insert record.
LWPINS,
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=317544&r1=317543&r2=317544&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon Nov 6 23:13:03 2017
@@ -7177,21 +7177,22 @@ def : Pat<(v4f64 (uint_to_fp (v4i64 VR25
//===----------------------------------------------------------------------===//
multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
X86MemOperand x86memop, PatFrag ld_frag> {
- defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
- "vcvtph2ps", "$src", "$src",
- (X86cvtph2ps (_src.VT _src.RC:$src),
- (i32 FROUND_CURRENT))>, T8PD;
- defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src),
- "vcvtph2ps", "$src", "$src",
- (X86cvtph2ps (_src.VT (bitconvert (ld_frag addr:$src))),
- (i32 FROUND_CURRENT))>, T8PD;
+ defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
+ (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
+ (X86cvtph2ps (_src.VT _src.RC:$src))>, T8PD;
+ defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
+ (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
+ (X86cvtph2ps (_src.VT
+ (bitconvert
+ (ld_frag addr:$src))))>, T8PD;
}
multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
- defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
- "vcvtph2ps", "{sae}, $src", "$src, {sae}",
- (X86cvtph2ps (_src.VT _src.RC:$src),
- (i32 FROUND_NO_EXC))>, T8PD, EVEX_B;
+ defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
+ (ins _src.RC:$src), "vcvtph2ps",
+ "{sae}, $src", "$src, {sae}",
+ (X86cvtph2psRnd (_src.VT _src.RC:$src),
+ (i32 FROUND_NO_EXC))>, T8PD, EVEX_B;
}
Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=317544&r1=317543&r2=317544&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Mon Nov 6 23:13:03 2017
@@ -591,7 +591,12 @@ def X86VUintToFP : SDNode<"X86ISD::
def X86cvtp2Int : SDNode<"X86ISD::CVTP2SI", SDTFloatToInt>;
def X86cvtp2UInt : SDNode<"X86ISD::CVTP2UI", SDTFloatToInt>;
+
def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS",
+ SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
+ SDTCVecEltisVT<1, i16>]> >;
+
+def X86cvtph2psRnd : SDNode<"X86ISD::CVTPH2PS_RND",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
SDTCVecEltisVT<1, i16>,
SDTCisVT<2, i32>]> >;
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=317544&r1=317543&r2=317544&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Nov 6 23:13:03 2017
@@ -7685,10 +7685,10 @@ let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4
//===----------------------------------------------------------------------===//
// Half precision conversion instructions
//===----------------------------------------------------------------------===//
-multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
+multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop> {
def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}",
- [(set RC:$dst, (Int VR128:$src))]>,
+ [(set RC:$dst, (X86cvtph2ps VR128:$src))]>,
T8PD, VEX, Sched<[WriteCvtF2F]>;
let hasSideEffects = 0, mayLoad = 1 in
def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
@@ -7710,20 +7710,23 @@ multiclass f16c_ps2ph<RegisterClass RC,
TAPD, VEX;
}
-let Predicates = [HasF16C] in {
- defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>;
- defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>, VEX_L;
- defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
- defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L;
+let Predicates = [HasF16C, NoVLX] in {
+ defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem>;
+ defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem>, VEX_L;
// Pattern match vcvtph2ps of a scalar i64 load.
- def : Pat<(int_x86_vcvtph2ps_128 (vzmovl_v2i64 addr:$src)),
+ def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
(VCVTPH2PSrm addr:$src)>;
- def : Pat<(int_x86_vcvtph2ps_128 (vzload_v2i64 addr:$src)),
+ def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
(VCVTPH2PSrm addr:$src)>;
- def : Pat<(int_x86_vcvtph2ps_128 (bitconvert
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
(VCVTPH2PSrm addr:$src)>;
+}
+
+let Predicates = [HasF16C] in {
+ defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
+ defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L;
def : Pat<(store (f64 (extractelt (bc_v2f64 (v8i16
(int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=317544&r1=317543&r2=317544&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Mon Nov 6 23:13:03 2017
@@ -1071,12 +1071,12 @@ static const IntrinsicData IntrinsicsWi
X86ISD::FSUBS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FSUBS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK_RM,
+ X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTPH2PS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK_RM,
- X86ISD::CVTPH2PS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK_RM,
+ X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK,
X86ISD::CVTPH2PS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTPH2PS, X86ISD::CVTPH2PS_RND),
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, INTR_TYPE_2OP_MASK,
X86ISD::CVTPS2PH, 0),
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, INTR_TYPE_2OP_MASK,
@@ -1586,6 +1586,8 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(ssse3_pmadd_ub_sw_128, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0),
X86_INTRINSIC_DATA(ssse3_pmul_hr_sw_128, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
+ X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
+ X86_INTRINSIC_DATA(vcvtph2ps_256, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
X86_INTRINSIC_DATA(xop_vpcomd, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
X86_INTRINSIC_DATA(xop_vpcomq, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
Modified: llvm/trunk/test/CodeGen/X86/f16c-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/f16c-intrinsics.ll?rev=317544&r1=317543&r2=317544&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/f16c-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/f16c-intrinsics.ll Mon Nov 6 23:13:03 2017
@@ -17,12 +17,12 @@ define <4 x float> @test_x86_vcvtph2ps_1
;
; X32-AVX512VL-LABEL: test_x86_vcvtph2ps_128:
; X32-AVX512VL: # BB#0:
-; X32-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0xc0]
+; X32-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_vcvtph2ps_128:
; X64-AVX512VL: # BB#0:
-; X64-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0xc0]
+; X64-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
@@ -43,12 +43,12 @@ define <8 x float> @test_x86_vcvtph2ps_2
;
; X32-AVX512VL-LABEL: test_x86_vcvtph2ps_256:
; X32-AVX512VL: # BB#0:
-; X32-AVX512VL-NEXT: vcvtph2ps %xmm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
+; X32-AVX512VL-NEXT: vcvtph2ps %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_vcvtph2ps_256:
; X64-AVX512VL: # BB#0:
-; X64-AVX512VL-NEXT: vcvtph2ps %xmm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
+; X64-AVX512VL-NEXT: vcvtph2ps %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
@@ -70,12 +70,12 @@ define <8 x float> @test_x86_vcvtph2ps_2
; X32-AVX512VL-LABEL: test_x86_vcvtph2ps_256_m:
; X32-AVX512VL: # BB#0:
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X32-AVX512VL-NEXT: vcvtph2ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x13,0x00]
+; X32-AVX512VL-NEXT: vcvtph2ps (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0x00]
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_vcvtph2ps_256_m:
; X64-AVX512VL: # BB#0:
-; X64-AVX512VL-NEXT: vcvtph2ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x13,0x07]
+; X64-AVX512VL-NEXT: vcvtph2ps (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0x07]
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
%load = load <8 x i16>, <8 x i16>* %a, align 16
%res = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %load)
@@ -151,12 +151,16 @@ define <4 x float> @test_x86_vcvtps2ph_1
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_scalar:
; X32-AVX512VL: # BB#0:
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X32-AVX512VL-NEXT: vcvtph2ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x00]
+; X32-AVX512VL-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
+; X32-AVX512VL-NEXT: # xmm0 = mem[0],zero
+; X32-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_scalar:
; X64-AVX512VL: # BB#0:
-; X64-AVX512VL-NEXT: vcvtph2ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x07]
+; X64-AVX512VL-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
+; X64-AVX512VL-NEXT: # xmm0 = mem[0],zero
+; X64-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
%load = load i64, i64* %ptr
%ins1 = insertelement <2 x i64> undef, i64 %load, i32 0
@@ -181,12 +185,16 @@ define <4 x float> @test_x86_vcvtps2ph_1
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_scalar2:
; X32-AVX512VL: # BB#0:
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X32-AVX512VL-NEXT: vcvtph2ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x00]
+; X32-AVX512VL-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
+; X32-AVX512VL-NEXT: # xmm0 = mem[0],zero
+; X32-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_scalar2:
; X64-AVX512VL: # BB#0:
-; X64-AVX512VL-NEXT: vcvtph2ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x07]
+; X64-AVX512VL-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
+; X64-AVX512VL-NEXT: # xmm0 = mem[0],zero
+; X64-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
%load = load i64, i64* %ptr
%ins = insertelement <2 x i64> undef, i64 %load, i32 0
More information about the llvm-commits
mailing list