[llvm] r312462 - [X86] Add a combine to recognize when we have two insert subvectors that together write the whole vector, but the starting vector isn't undef.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 3 18:13:36 PDT 2017
Author: ctopper
Date: Sun Sep 3 18:13:36 2017
New Revision: 312462
URL: http://llvm.org/viewvc/llvm-project?rev=312462&view=rev
Log:
[X86] Add a combine to recognize when we have two insert subvectors that together write the whole vector, but the starting vector isn't undef.
In this case we should replace the starting vector with undef.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll
llvm/trunk/test/CodeGen/X86/insertelement-zero.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=312462&r1=312461&r2=312462&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Sep 3 18:13:36 2017
@@ -35750,6 +35750,18 @@ static SDValue combineInsertSubvector(SD
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
getZeroVector(OpVT, Subtarget, DAG, dl), SubVec2,
Vec.getOperand(2));
+
+ // If we are inserting into both halves of the vector, the starting
+ // vector should be undef. If it isn't, make it so. Only do this if the
+ // the early insert has no other uses.
+ // TODO: Should this be a generic DAG combine?
+ if (!Vec.getOperand(0).isUndef() && Vec.hasOneUse()) {
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT),
+ SubVec2, Vec.getOperand(2));
+ DCI.AddToWorklist(Vec.getNode());
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, SubVec, Idx);
+
+ }
}
}
Modified: llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll?rev=312462&r1=312461&r2=312462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll Sun Sep 3 18:13:36 2017
@@ -758,7 +758,6 @@ define <16 x i16> @_clearupper16xi16b(<1
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX-NEXT: vandps %xmm1, %xmm0, %xmm2
-; AVX-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5,6,7]
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Modified: llvm/trunk/test/CodeGen/X86/insertelement-zero.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insertelement-zero.ll?rev=312462&r1=312461&r2=312462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insertelement-zero.ll (original)
+++ llvm/trunk/test/CodeGen/X86/insertelement-zero.ll Sun Sep 3 18:13:36 2017
@@ -473,7 +473,6 @@ define <32 x i8> @insert_v32i8_z12345678
; AVX1-NEXT: xorl %eax, %eax
; AVX1-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
@@ -485,7 +484,6 @@ define <32 x i8> @insert_v32i8_z12345678
; AVX2-NEXT: xorl %eax, %eax
; AVX2-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
More information about the llvm-commits
mailing list