[llvm] r286434 - [AVX-512][X86] Convert avx_cvtt_ps2dq_256 and sse2_cvttps2dq intrinsics to ISD::FP_TO_SINT in the intrinsics table and delete patterns. While nearby also move CVTDQ2PS patterns into their instructions.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 9 23:24:53 PST 2016
Author: ctopper
Date: Thu Nov 10 01:24:52 2016
New Revision: 286434
URL: http://llvm.org/viewvc/llvm-project?rev=286434&view=rev
Log:
[AVX-512][X86] Convert avx_cvtt_ps2dq_256 and sse2_cvttps2dq intrinsics to ISD::FP_TO_SINT in the intrinsics table and delete patterns. While nearby also move CVTDQ2PS patterns into their instructions.
This allows these intrinsics to also use EVEX instructons.
Modified:
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=286434&r1=286433&r2=286434&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Nov 10 01:24:52 2016
@@ -1441,15 +1441,18 @@ multiclass sse12_cvt_s<bits<8> opc, Regi
itins.rm>, Sched<[itins.Sched.Folded]>;
}
-multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- X86MemOperand x86memop, string asm, Domain d,
- OpndItins itins> {
+multiclass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop,
+ ValueType DstTy, ValueType SrcTy, PatFrag ld_frag,
+ string asm, Domain d, OpndItins itins> {
let hasSideEffects = 0 in {
- def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [], itins.rr, d>, Sched<[itins.Sched]>;
+ def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), asm,
+ [(set RC:$dst, (DstTy (sint_to_fp (SrcTy RC:$src))))],
+ itins.rr, d>, Sched<[itins.Sched]>;
let mayLoad = 1 in
- def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [], itins.rm, d>, Sched<[itins.Sched.Folded]>;
+ def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm,
+ [(set RC:$dst, (DstTy (sint_to_fp
+ (SrcTy (bitconvert (ld_frag addr:$src))))))],
+ itins.rm, d>, Sched<[itins.Sched.Folded]>;
}
}
@@ -1722,16 +1725,16 @@ defm CVTSS2SI64 : sse12_cvt_sint<0x2D, V
ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_64>, XS, REX_W;
-defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
+defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, loadv2i64,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, SSE_CVT_PS>,
- PS, VEX, Requires<[HasAVX]>;
-defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem,
+ PS, VEX, Requires<[HasAVX, NoVLX]>;
+defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, loadv4i64,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, SSE_CVT_PS>,
- PS, VEX, VEX_L, Requires<[HasAVX]>;
+ PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>;
-defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memopv2i64,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, SSE_CVT_PS>,
PS, Requires<[UseSSE2]>;
@@ -1999,72 +2002,41 @@ def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (
// Convert with truncation packed single/double fp to doubleword
// SSE2 packed instructions with XS prefix
+let Predicates = [HasAVX, NoVLX] in {
def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvttps2dq VR128:$src))],
+ (v4i32 (fp_to_sint (v4f32 VR128:$src))))],
IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttps2dq
- (loadv4f32 addr:$src)))],
+ [(set VR128:$dst,
+ (v4i32 (fp_to_sint (loadv4f32 addr:$src))))],
IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
+ (v8i32 (fp_to_sint (v8f32 VR256:$src))))],
IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
- (loadv8f32 addr:$src)))],
+ [(set VR256:$dst,
+ (v8i32 (fp_to_sint (loadv8f32 addr:$src))))],
IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
Sched<[WriteCvtF2ILd]>;
+}
def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))],
+ [(set VR128:$dst,
+ (v4i32 (fp_to_sint (v4f32 VR128:$src))))],
IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
+ (v4i32 (fp_to_sint (memopv4f32 addr:$src))))],
IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
-let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
- (VCVTDQ2PSrr VR128:$src)>;
- def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
- (VCVTDQ2PSrm addr:$src)>;
-
- def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
- (VCVTTPS2DQrr VR128:$src)>;
- def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
- (VCVTTPS2DQrm addr:$src)>;
-
- def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))),
- (VCVTDQ2PSYrr VR256:$src)>;
- def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (loadv4i64 addr:$src)))),
- (VCVTDQ2PSYrm addr:$src)>;
-
- def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))),
- (VCVTTPS2DQYrr VR256:$src)>;
- def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
- (VCVTTPS2DQYrm addr:$src)>;
-}
-
-let Predicates = [UseSSE2] in {
- def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
- (CVTDQ2PSrr VR128:$src)>;
- def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
- (CVTDQ2PSrm addr:$src)>;
-
- def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
- (CVTTPS2DQrr VR128:$src)>;
- def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
- (CVTTPS2DQrm addr:$src)>;
-}
-
let Predicates = [HasAVX, NoVLX] in
def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=286434&r1=286433&r2=286434&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Thu Nov 10 01:24:52 2016
@@ -255,6 +255,7 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0),
X86_INTRINSIC_DATA(avx_cvtdq2_ps_256, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0),
X86_INTRINSIC_DATA(avx_cvtt_pd2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0),
+ X86_INTRINSIC_DATA(avx_cvtt_ps2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0),
X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
@@ -1638,6 +1639,7 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(sse2_cvtdq2ps, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0),
X86_INTRINSIC_DATA(sse2_cvtpd2ps, INTR_TYPE_1OP, X86ISD::VFPROUND, 0),
X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTPD2DQ, 0),
+ X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, ISD::FP_TO_SINT, 0),
X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0),
X86_INTRINSIC_DATA(sse2_movmsk_pd, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll?rev=286434&r1=286433&r2=286434&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll Thu Nov 10 01:24:52 2016
@@ -354,10 +354,15 @@ declare <4 x i32> @llvm.x86.sse2.cvttpd2
define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
-; CHECK-LABEL: test_x86_sse2_cvttps2dq:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5b,0xc0]
-; CHECK-NEXT: retl ## encoding: [0xc3]
+; AVX-LABEL: test_x86_sse2_cvttps2dq:
+; AVX: ## BB#0:
+; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5b,0xc0]
+; AVX-NEXT: retl ## encoding: [0xc3]
+;
+; AVX512VL-LABEL: test_x86_sse2_cvttps2dq:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x5b,0xc0]
+; AVX512VL-NEXT: retl ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -2899,10 +2904,15 @@ declare <4 x i32> @llvm.x86.avx.cvtt.pd2
define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
-; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ## encoding: [0xc5,0xfe,0x5b,0xc0]
-; CHECK-NEXT: retl ## encoding: [0xc3]
+; AVX-LABEL: test_x86_avx_cvtt_ps2dq_256:
+; AVX: ## BB#0:
+; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 ## encoding: [0xc5,0xfe,0x5b,0xc0]
+; AVX-NEXT: retl ## encoding: [0xc3]
+;
+; AVX512VL-LABEL: test_x86_avx_cvtt_ps2dq_256:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x5b,0xc0]
+; AVX512VL-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
ret <8 x i32> %res
}
Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll?rev=286434&r1=286433&r2=286434&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll Thu Nov 10 01:24:52 2016
@@ -543,10 +543,15 @@ define <4 x i32> @test_x86_sse2_cvttps2d
; SSE-NEXT: cvttps2dq %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x5b,0xc0]
; SSE-NEXT: retl ## encoding: [0xc3]
;
-; VCHECK-LABEL: test_x86_sse2_cvttps2dq:
-; VCHECK: ## BB#0:
-; VCHECK-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5b,0xc0]
-; VCHECK-NEXT: retl ## encoding: [0xc3]
+; AVX2-LABEL: test_x86_sse2_cvttps2dq:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5b,0xc0]
+; AVX2-NEXT: retl ## encoding: [0xc3]
+;
+; SKX-LABEL: test_x86_sse2_cvttps2dq:
+; SKX: ## BB#0:
+; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x5b,0xc0]
+; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
More information about the llvm-commits
mailing list