[llvm] r286979 - [X86][SSE] Improve SINT_TO_FP of boolean vector results (signum)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 15 08:24:41 PST 2016
Author: rksimon
Date: Tue Nov 15 10:24:40 2016
New Revision: 286979
URL: http://llvm.org/viewvc/llvm-project?rev=286979&view=rev
Log:
[X86][SSE] Improve SINT_TO_FP of boolean vector results (signum)
This patch helps avoids poor legalization of boolean vector results (e.g. 8f32 -> 8i1 -> 8i16) that feed into SINT_TO_FP by inserting an early SIGN_EXTEND and so help improve the truncation logic.
This is not necessary for AVX512 targets where boolean vectors are legal - AVX512 manages to lower ( sint_to_fp vXi1 ) into some form of ( select mask, 1.0f , 0.0f ) in most cases.
Fix for PR13248
Differential Revision: https://reviews.llvm.org/D26583
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
llvm/trunk/test/CodeGen/X86/sse-fsignum.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=286979&r1=286978&r2=286979&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Nov 15 10:24:40 2016
@@ -31752,9 +31752,12 @@ static SDValue combineSIntToFP(SDNode *N
EVT InVT = Op0.getValueType();
EVT InSVT = InVT.getScalarType();
+ // SINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32))
// SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))
// SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))
- if (InVT.isVector() && (InSVT == MVT::i8 || InSVT == MVT::i16)) {
+ if (InVT.isVector() &&
+ (InSVT == MVT::i8 || InSVT == MVT::i16 ||
+ (InSVT == MVT::i1 && !DAG.getTargetLoweringInfo().isTypeLegal(InVT)))) {
SDLoc dl(N);
EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
InVT.getVectorNumElements());
Modified: llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt.ll?rev=286979&r1=286978&r2=286979&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cvt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cvt.ll Tue Nov 15 10:24:40 2016
@@ -836,8 +836,6 @@ define <4 x double> @sitofp_4i1_double(<
; KNL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; KNL-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; KNL-NEXT: vpmovqd %zmm0, %ymm0
-; KNL-NEXT: vpslld $31, %xmm0, %xmm0
-; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
; KNL-NEXT: vcvtdq2pd %xmm0, %ymm0
; KNL-NEXT: retq
;
@@ -860,21 +858,8 @@ define <2 x float> @sitofp_2i1_float(<2
; KNL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; KNL-NEXT: vpsllq $32, %xmm0, %xmm0
-; KNL-NEXT: vpsrad $31, %xmm0, %xmm1
-; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; KNL-NEXT: vpextrq $1, %xmm0, %rax
-; KNL-NEXT: xorl %ecx, %ecx
-; KNL-NEXT: testb $1, %al
-; KNL-NEXT: movl $-1, %eax
-; KNL-NEXT: movl $0, %edx
-; KNL-NEXT: cmovnel %eax, %edx
-; KNL-NEXT: vcvtsi2ssl %edx, %xmm2, %xmm1
-; KNL-NEXT: vmovq %xmm0, %rdx
-; KNL-NEXT: testb $1, %dl
-; KNL-NEXT: cmovnel %eax, %ecx
-; KNL-NEXT: vcvtsi2ssl %ecx, %xmm2, %xmm0
-; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; KNL-NEXT: vcvtdq2ps %xmm0, %xmm0
; KNL-NEXT: retq
;
; SKX-LABEL: sitofp_2i1_float:
Modified: llvm/trunk/test/CodeGen/X86/sse-fsignum.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-fsignum.ll?rev=286979&r1=286978&r2=286979&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-fsignum.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-fsignum.ll Tue Nov 15 10:24:40 2016
@@ -33,59 +33,19 @@ entry:
}
define void @signum64a(<2 x double>*) {
-; AVX1-LABEL: signum64a:
-; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovapd (%rdi), %xmm0
-; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vpextrq $1, %xmm2, %rax
-; AVX1-NEXT: vmovq %xmm2, %rcx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vcvtdq2pd %xmm2, %xmm2
-; AVX1-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpextrq $1, %xmm0, %rax
-; AVX1-NEXT: vmovq %xmm0, %rcx
-; AVX1-NEXT: vmovd %ecx, %xmm0
-; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0
-; AVX1-NEXT: vsubpd %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: vmovapd %xmm0, (%rdi)
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: signum64a:
-; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vmovapd (%rdi), %xmm0
-; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2
-; AVX2-NEXT: vpextrq $1, %xmm2, %rax
-; AVX2-NEXT: vmovq %xmm2, %rcx
-; AVX2-NEXT: vmovd %ecx, %xmm2
-; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vcvtdq2pd %xmm2, %xmm2
-; AVX2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpextrq $1, %xmm0, %rax
-; AVX2-NEXT: vmovq %xmm0, %rcx
-; AVX2-NEXT: vmovd %ecx, %xmm0
-; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; AVX2-NEXT: vcvtdq2pd %xmm0, %xmm0
-; AVX2-NEXT: vsubpd %xmm0, %xmm2, %xmm0
-; AVX2-NEXT: vmovapd %xmm0, (%rdi)
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: signum64a:
-; AVX512F: # BB#0: # %entry
-; AVX512F-NEXT: vmovapd (%rdi), %xmm0
-; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2
-; AVX512F-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; AVX512F-NEXT: vcvtdq2pd %xmm2, %xmm2
-; AVX512F-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0
-; AVX512F-NEXT: vsubpd %xmm0, %xmm2, %xmm0
-; AVX512F-NEXT: vmovapd %xmm0, (%rdi)
-; AVX512F-NEXT: retq
+; AVX-LABEL: signum64a:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vmovapd (%rdi), %xmm0
+; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; AVX-NEXT: vcvtdq2pd %xmm2, %xmm2
+; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
+; AVX-NEXT: vsubpd %xmm0, %xmm2, %xmm0
+; AVX-NEXT: vmovapd %xmm0, (%rdi)
+; AVX-NEXT: retq
entry:
%1 = load <2 x double>, <2 x double>* %0
%2 = fcmp olt <2 x double> %1, zeroinitializer
@@ -107,24 +67,8 @@ define void @signum32b(<8 x float>*) {
; AVX1-NEXT: vmovaps (%rdi), %ymm0
; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vcmpltps %ymm1, %ymm0, %ymm2
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
-; AVX1-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2
-; AVX1-NEXT: vpsraw $15, %xmm2, %xmm2
-; AVX1-NEXT: vpmovsxwd %xmm2, %xmm3
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX1-NEXT: vcvtdq2ps %ymm2, %ymm2
; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
-; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
-; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: vsubps %ymm0, %ymm2, %ymm0
; AVX1-NEXT: vmovaps %ymm0, (%rdi)
@@ -136,18 +80,8 @@ define void @signum32b(<8 x float>*) {
; AVX2-NEXT: vmovaps (%rdi), %ymm0
; AVX2-NEXT: vxorps %ymm1, %ymm1, %ymm1
; AVX2-NEXT: vcmpltps %ymm1, %ymm0, %ymm2
-; AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3
-; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpsllw $15, %xmm2, %xmm2
-; AVX2-NEXT: vpsraw $15, %xmm2, %xmm2
-; AVX2-NEXT: vpmovsxwd %xmm2, %ymm2
; AVX2-NEXT: vcvtdq2ps %ymm2, %ymm2
; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
-; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0
-; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX2-NEXT: vsubps %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vmovaps %ymm0, (%rdi)
@@ -189,14 +123,10 @@ define void @signum64b(<4 x double>*) {
; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
; AVX1-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
-; AVX1-NEXT: vpsrad $31, %xmm2, %xmm2
; AVX1-NEXT: vcvtdq2pd %xmm2, %ymm2
; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
-; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX1-NEXT: vsubpd %ymm0, %ymm2, %ymm0
; AVX1-NEXT: vmovapd %ymm0, (%rdi)
@@ -210,14 +140,10 @@ define void @signum64b(<4 x double>*) {
; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3
; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpslld $31, %xmm2, %xmm2
-; AVX2-NEXT: vpsrad $31, %xmm2, %xmm2
; AVX2-NEXT: vcvtdq2pd %xmm2, %ymm2
; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpslld $31, %xmm0, %xmm0
-; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX2-NEXT: vsubpd %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vmovapd %ymm0, (%rdi)
@@ -230,13 +156,9 @@ define void @signum64b(<4 x double>*) {
; AVX512F-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; AVX512F-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
; AVX512F-NEXT: vpmovqd %zmm2, %ymm2
-; AVX512F-NEXT: vpslld $31, %xmm2, %xmm2
-; AVX512F-NEXT: vpsrad $31, %xmm2, %xmm2
; AVX512F-NEXT: vcvtdq2pd %xmm2, %ymm2
; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
-; AVX512F-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX512F-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX512F-NEXT: vsubpd %ymm0, %ymm2, %ymm0
; AVX512F-NEXT: vmovapd %ymm0, (%rdi)
More information about the llvm-commits
mailing list