[llvm] r354757 - [X86] Combine zext(packus(x), packus(y)) -> concat(x, y) (PR39637)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 24 11:57:52 PST 2019
Author: rksimon
Date: Sun Feb 24 11:57:52 2019
New Revision: 354757
URL: http://llvm.org/viewvc/llvm-project?rev=354757&view=rev
Log:
[X86] Combine zext(packus(x),packus(y)) -> concat(x,y) (PR39637)
Its proving tricky to combine shuffles across multiple vector sizes, so for now I'm adding this more specific combine - the pattern is common enough to be worth it as a first step.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/combine-udiv.ll
llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-mul-widen.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-mul.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=354757&r1=354756&r2=354757&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Feb 24 11:57:52 2019
@@ -40328,6 +40328,20 @@ static SDValue combineZext(SDNode *N, Se
if (SDValue R = combineOrCmpEqZeroToCtlzSrl(N, DAG, DCI, Subtarget))
return R;
+ // TODO: Combine with any target/faux shuffle.
+ if (N0.getOpcode() == X86ISD::PACKUS && N0.getValueSizeInBits() == 128 &&
+ VT.getScalarSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits()) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ unsigned NumSrcElts = N00.getValueType().getVectorNumElements();
+ unsigned NumSrcEltBits = N00.getScalarValueSizeInBits();
+ APInt ZeroMask = APInt::getHighBitsSet(NumSrcEltBits, NumSrcEltBits / 2);
+ if ((N00.isUndef() || DAG.MaskedValueIsZero(N00, ZeroMask)) &&
+ (N01.isUndef() || DAG.MaskedValueIsZero(N01, ZeroMask))) {
+ return concatSubVectors(N00, N01, VT, NumSrcElts * 2, DAG, dl, 128);
+ }
+ }
+
return SDValue();
}
Modified: llvm/trunk/test/CodeGen/X86/combine-udiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-udiv.ll?rev=354757&r1=354756&r2=354757&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-udiv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-udiv.ll Sun Feb 24 11:57:52 2019
@@ -702,9 +702,6 @@ define <16 x i8> @combine_vec_udiv_nonun
; AVX2-NEXT: vmovd %eax, %xmm2
; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
; AVX2-NEXT: vpackuswb %xmm0, %xmm1, %xmm1
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll?rev=354757&r1=354756&r2=354757&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll Sun Feb 24 11:57:52 2019
@@ -387,9 +387,6 @@ define <16 x i8> @test_divconstant_16i8(
; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX2NOBW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1
-; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
-; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX2NOBW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1
; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
@@ -910,9 +907,6 @@ define <16 x i8> @test_remconstant_16i8(
; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX2NOBW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1
-; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
-; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX2NOBW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1
; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
@@ -927,9 +921,6 @@ define <16 x i8> @test_remconstant_16i8(
; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX2NOBW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1
-; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
-; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX2NOBW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-mul-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-mul-widen.ll?rev=354757&r1=354756&r2=354757&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-mul-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-mul-widen.ll Sun Feb 24 11:57:52 2019
@@ -1840,27 +1840,21 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm2
+; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
-; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpsrld $16, %xmm0, %xmm2
-; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm2
+; AVX2-NEXT: vpsrld $16, %xmm2, %xmm2
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
-; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm1
+; AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-mul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-mul.ll?rev=354757&r1=354756&r2=354757&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-mul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-mul.ll Sun Feb 24 11:57:52 2019
@@ -1795,27 +1795,21 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm2
+; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
-; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpsrld $16, %xmm0, %xmm2
-; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm2
+; AVX2-NEXT: vpsrld $16, %xmm2, %xmm2
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
-; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm1
+; AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
More information about the llvm-commits
mailing list