[llvm] r286344 - [X86] Lower AVX512 and SSE intrinsics for CVTTPD2DQ to X86ISD::CVTTPD2DQ.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 8 23:31:33 PST 2016
Author: ctopper
Date: Wed Nov 9 01:31:32 2016
New Revision: 286344
URL: http://llvm.org/viewvc/llvm-project?rev=286344&view=rev
Log:
[X86] Lower AVX512 and SSE intrinsics for CVTTPD2DQ to X86ISD::CVTTPD2DQ.
Summary: This allows the SSE intrinsic to use the EVEX instruction when available. It also fixes EVEX to not use a weird (v4i32 (fp_to_sint v2f64)) node and it merges some isel patterns. This also fixes some cases that weren't combining vzmovl with cvttpd2dq to remove extra moves.
Reviewers: delena, zvi, RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D26330
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=286344&r1=286343&r2=286344&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Nov 9 01:31:32 2016
@@ -6077,6 +6077,10 @@ def : Pat<(v8f64 (extloadv8f32 addr:$src
(VCVTPS2PDZrm addr:$src)>;
let Predicates = [HasVLX] in {
+ let AddedComplexity = 15 in
+ def : Pat<(X86vzmovl (v2f64 (bitconvert
+ (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
+ (VCVTPD2PSZ128rr VR128X:$src)>;
def : Pat<(v2f64 (extloadv2f32 addr:$src)),
(VCVTPS2PDZ128rm addr:$src)>;
def : Pat<(v4f64 (extloadv4f32 addr:$src)),
@@ -6148,8 +6152,8 @@ multiclass avx512_cvtps2dq<bits<8> opc,
}
// Convert Double to Signed/Unsigned Doubleword with truncation
-multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr,
- SDNode OpNode, SDNode OpNodeRnd> {
+multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNode128, SDNode OpNodeRnd> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
@@ -6157,11 +6161,11 @@ multiclass avx512_cvttpd2dq<bits<8> opc,
}
let Predicates = [HasVLX] in {
// we need "x"/"y" suffixes in order to distinguish between 128 and 256
- // memory forms of these instructions in Asm Parcer. They have the same
+ // memory forms of these instructions in Asm Parser. They have the same
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.
- defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
- "{1to2}", "{x}">, EVEX_V128;
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
+ OpNode128, "{1to2}", "{x}">, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
"{1to4}", "{y}">, EVEX_V256;
}
@@ -6302,7 +6306,7 @@ defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B,
X86cvttp2siRnd>,
XS, EVEX_CD8<32, CD8VF>;
-defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint,
+defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttpd2dq,
X86cvttp2siRnd>,
PD, VEX_W, EVEX_CD8<64, CD8VF>;
@@ -6310,7 +6314,7 @@ defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78
X86cvttp2uiRnd>, PS,
EVEX_CD8<32, CD8VF>;
-defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
+defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint, fp_to_uint,
X86cvttp2uiRnd>, PS, VEX_W,
EVEX_CD8<64, CD8VF>;
@@ -6408,13 +6412,10 @@ def : Pat<(v4f64 (uint_to_fp (v4i32 VR12
}
let Predicates = [HasAVX512, HasVLX] in {
- def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvttpd2dq (v2f64 VR128X:$src)))))))),
+ let AddedComplexity = 15 in
+ def : Pat<(X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvttpd2dq (v2f64 VR128X:$src)))))),
(VCVTTPD2DQZ128rr VR128:$src)>;
- def : Pat<(v4i32 (X86cvttpd2dq (v2f64 VR128X:$src))),
- (VCVTTPD2DQZ128rr VR128X:$src)>;
- def : Pat<(v4i32 (X86cvttpd2dq (loadv2f64 addr:$src))),
- (VCVTTPD2DQZ128rm addr:$src)>;
}
let Predicates = [HasAVX512] in {
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=286344&r1=286343&r2=286344&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Nov 9 01:31:32 2016
@@ -2065,11 +2065,12 @@ let Predicates = [UseSSE2] in {
(CVTTPS2DQrm addr:$src)>;
}
+let Predicates = [HasAVX, NoVLX] in
def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvttpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>;
+ (v4i32 (X86cvttpd2dq (v2f64 VR128:$src))))],
+ IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>;
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
@@ -2078,10 +2079,11 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg,
// XMM only
def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQrr VR128:$dst, VR128:$src), 0>;
+let Predicates = [HasAVX, NoVLX] in
def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttpd2dqx\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (loadv2f64 addr:$src)))],
+ [(set VR128:$dst,
+ (v4i32 (X86cvttpd2dq (loadv2f64 addr:$src))))],
IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>;
// YMM only
@@ -2099,13 +2101,10 @@ def : InstAlias<"vcvttpd2dq\t{$src, $dst
(VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;
let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvttpd2dq (v2f64 VR128:$src)))))))),
- (VCVTTPD2DQrr VR128:$src)>;
- def : Pat<(v4i32 (X86cvttpd2dq (v2f64 VR128:$src))),
+ let AddedComplexity = 15 in
+ def : Pat<(X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvttpd2dq (v2f64 VR128:$src)))))),
(VCVTTPD2DQrr VR128:$src)>;
- def : Pat<(v4i32 (X86cvttpd2dq (loadv2f64 addr:$src))),
- (VCVTTPD2DQXrm addr:$src)>;
def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
(VCVTTPD2DQYrr VR256:$src)>;
@@ -2125,8 +2124,9 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (
Sched<[WriteCvtF2ILd]>;
let Predicates = [UseSSE2] in {
- def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvttpd2dq (v2f64 VR128:$src)))))))),
+ let AddedComplexity = 15 in
+ def : Pat<(X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvttpd2dq (v2f64 VR128:$src)))))),
(CVTTPD2DQrr VR128:$src)>;
def : Pat<(v4i32 (X86cvttpd2dq (v2f64 VR128:$src))),
(CVTTPD2DQrr VR128:$src)>;
@@ -2254,8 +2254,9 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (o
let Predicates = [HasAVX, NoVLX] in {
// Match fpround and fpextend for 128/256-bit conversions
- def : Pat<(v4f32 (bitconvert (X86vzmovl (v2f64 (bitconvert
- (v4f32 (X86vfpround (v2f64 VR128:$src)))))))),
+ let AddedComplexity = 15 in
+ def : Pat<(X86vzmovl (v2f64 (bitconvert
+ (v4f32 (X86vfpround (v2f64 VR128:$src)))))),
(VCVTPD2PSrr VR128:$src)>;
def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
(VCVTPD2PSrr VR128:$src)>;
@@ -2272,8 +2273,9 @@ let Predicates = [HasAVX, NoVLX] in {
let Predicates = [UseSSE2] in {
// Match fpround and fpextend for 128 conversions
- def : Pat<(v4f32 (bitconvert (X86vzmovl (v2f64 (bitconvert
- (v4f32 (X86vfpround (v2f64 VR128:$src)))))))),
+ let AddedComplexity = 15 in
+ def : Pat<(X86vzmovl (v2f64 (bitconvert
+ (v4f32 (X86vfpround (v2f64 VR128:$src)))))),
(CVTPD2PSrr VR128:$src)>;
def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
(CVTPD2PSrr VR128:$src)>;
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=286344&r1=286343&r2=286344&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Wed Nov 9 01:31:32 2016
@@ -574,7 +574,7 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::VFPEXTS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK,
- ISD::FP_TO_SINT, 0),
+ X86ISD::CVTTPD2DQ, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_256, INTR_TYPE_1OP_MASK,
ISD::FP_TO_SINT, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK,
@@ -1636,6 +1636,7 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(sse2_comineq_sd, COMI, X86ISD::COMI, ISD::SETNE),
X86_INTRINSIC_DATA(sse2_cvtdq2ps, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0),
X86_INTRINSIC_DATA(sse2_cvtpd2ps, INTR_TYPE_1OP, X86ISD::VFPROUND, 0),
+ X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTPD2DQ, 0),
X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0),
X86_INTRINSIC_DATA(sse2_movmsk_pd, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll?rev=286344&r1=286343&r2=286344&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll Wed Nov 9 01:31:32 2016
@@ -338,10 +338,15 @@ declare <2 x double> @llvm.x86.sse2.cvts
define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
-; CHECK-LABEL: test_x86_sse2_cvttpd2dq:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0]
-; CHECK-NEXT: retl ## encoding: [0xc3]
+; AVX-LABEL: test_x86_sse2_cvttpd2dq:
+; AVX: ## BB#0:
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0]
+; AVX-NEXT: retl ## encoding: [0xc3]
+;
+; AVX512VL-LABEL: test_x86_sse2_cvttpd2dq:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe6,0xc0]
+; AVX512VL-NEXT: retl ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll?rev=286344&r1=286343&r2=286344&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll Wed Nov 9 01:31:32 2016
@@ -324,8 +324,6 @@ define <4 x float> @test_x86_sse2_cvtpd2
; SKX-LABEL: test_x86_sse2_cvtpd2ps_zext:
; SKX: ## BB#0:
; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x5a,0xc0]
-; SKX-NEXT: vmovq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x7e,0xc0]
-; SKX-NEXT: ## xmm0 = xmm0[0],zero
; SKX-NEXT: retl ## encoding: [0xc3]
%cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
%res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
@@ -502,10 +500,15 @@ define <4 x i32> @test_x86_sse2_cvttpd2d
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0xe6,0xc0]
; SSE-NEXT: retl ## encoding: [0xc3]
;
-; VCHECK-LABEL: test_x86_sse2_cvttpd2dq:
-; VCHECK: ## BB#0:
-; VCHECK-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0]
-; VCHECK-NEXT: retl ## encoding: [0xc3]
+; AVX2-LABEL: test_x86_sse2_cvttpd2dq:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0]
+; AVX2-NEXT: retl ## encoding: [0xc3]
+;
+; SKX-LABEL: test_x86_sse2_cvttpd2dq:
+; SKX: ## BB#0:
+; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe6,0xc0]
+; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -516,22 +519,16 @@ define <2 x i64> @test_mm_cvttpd_epi32_z
; SSE-LABEL: test_mm_cvttpd_epi32_zext:
; SSE: ## BB#0:
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0xe6,0xc0]
-; SSE-NEXT: movq %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x7e,0xc0]
-; SSE-NEXT: ## xmm0 = xmm0[0],zero
; SSE-NEXT: retl ## encoding: [0xc3]
;
; AVX2-LABEL: test_mm_cvttpd_epi32_zext:
; AVX2: ## BB#0:
; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0]
-; AVX2-NEXT: vmovq %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x7e,0xc0]
-; AVX2-NEXT: ## xmm0 = xmm0[0],zero
; AVX2-NEXT: retl ## encoding: [0xc3]
;
; SKX-LABEL: test_mm_cvttpd_epi32_zext:
; SKX: ## BB#0:
-; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0]
-; SKX-NEXT: vmovq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x7e,0xc0]
-; SKX-NEXT: ## xmm0 = xmm0[0],zero
+; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe6,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
%res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
More information about the llvm-commits
mailing list