[llvm] r347299 - [X86][SSE] Add SimplifyDemandedVectorElts support for PACKSS/PACKUS instructions.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 20 03:09:47 PST 2018
Author: rksimon
Date: Tue Nov 20 03:09:46 2018
New Revision: 347299
URL: http://llvm.org/viewvc/llvm-project?rev=347299&view=rev
Log:
[X86][SSE] Add SimplifyDemandedVectorElts support for PACKSS/PACKUS instructions.
As discussed on rL347240.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/combine-udiv.ll
llvm/trunk/test/CodeGen/X86/vec_fp_to_int-widen.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-sse41.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=347299&r1=347298&r2=347299&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Nov 20 03:09:46 2018
@@ -32232,6 +32232,36 @@ bool X86TargetLowering::SimplifyDemanded
return true;
break;
}
+ case X86ISD::PACKSS:
+ case X86ISD::PACKUS: {
+ int NumLanes = VT.getSizeInBits() / 128;
+ int NumInnerElts = NumElts / 2;
+ int NumEltsPerLane = NumElts / NumLanes;
+ int NumInnerEltsPerLane = NumInnerElts / NumLanes;
+
+ // Map DemandedElts to the packed operands.
+ APInt DemandedLHS = APInt::getNullValue(NumInnerElts);
+ APInt DemandedRHS = APInt::getNullValue(NumInnerElts);
+ for (int Lane = 0; Lane != NumLanes; ++Lane) {
+ for (int Elt = 0; Elt != NumInnerEltsPerLane; ++Elt) {
+ int OuterIdx = (Lane * NumEltsPerLane) + Elt;
+ int InnerIdx = (Lane * NumInnerEltsPerLane) + Elt;
+ if (DemandedElts[OuterIdx])
+ DemandedLHS.setBit(InnerIdx);
+ if (DemandedElts[OuterIdx + NumInnerEltsPerLane])
+ DemandedRHS.setBit(InnerIdx);
+ }
+ }
+
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, SrcUndef,
+ SrcZero, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, SrcUndef,
+ SrcZero, TLO, Depth + 1))
+ return true;
+ break;
+ }
case X86ISD::VBROADCAST: {
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
Modified: llvm/trunk/test/CodeGen/X86/combine-udiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-udiv.ll?rev=347299&r1=347298&r2=347299&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-udiv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-udiv.ll Tue Nov 20 03:09:46 2018
@@ -677,7 +677,7 @@ define <16 x i8> @combine_vec_udiv_nonun
; SSE41-NEXT: psllw $1, %xmm2
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3,4,5,6,7]
; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: packuswb %xmm3, %xmm2
+; SSE41-NEXT: packuswb %xmm0, %xmm2
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2
; SSE41-NEXT: movdqa %xmm2, %xmm0
@@ -697,7 +697,7 @@ define <16 x i8> @combine_vec_udiv_nonun
; AVX1-NEXT: vpsllw $1, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm0, %xmm1, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
@@ -714,8 +714,7 @@ define <16 x i8> @combine_vec_udiv_nonun
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpackuswb %xmm0, %xmm1, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/vec_fp_to_int-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fp_to_int-widen.ll?rev=347299&r1=347298&r2=347299&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fp_to_int-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fp_to_int-widen.ll Tue Nov 20 03:09:46 2018
@@ -350,8 +350,7 @@ define <4 x i32> @fptoui_2f64_to_4i32(<2
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
-; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpackssdw %xmm0, %xmm2, %xmm2
; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm3
; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
@@ -366,8 +365,7 @@ define <4 x i32> @fptoui_2f64_to_4i32(<2
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
-; AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3
-; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpackssdw %xmm0, %xmm2, %xmm2
; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1
; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-sse41.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-sse41.ll?rev=347299&r1=347298&r2=347299&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-sse41.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-sse41.ll Tue Nov 20 03:09:46 2018
@@ -37,15 +37,15 @@ define <16 x i8> @blend_packuswb(<8 x i1
define <8 x i16> @blend_packusdw_packuswb(<4 x i32> %a0, <4 x i32> %a1, <8 x i16> %a2, <8 x i16> %a3) {
; SSE41-LABEL: blend_packusdw_packuswb:
; SSE41: # %bb.0:
-; SSE41-NEXT: packusdw %xmm1, %xmm0
-; SSE41-NEXT: packuswb %xmm3, %xmm2
+; SSE41-NEXT: packusdw %xmm0, %xmm0
+; SSE41-NEXT: packuswb %xmm0, %xmm2
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE41-NEXT: retq
;
; AVX-LABEL: blend_packusdw_packuswb:
; AVX: # %bb.0:
-; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpackuswb %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpackuswb %xmm0, %xmm2, %xmm1
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%p0 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
More information about the llvm-commits
mailing list