[llvm] r310711 - Improve handling of insert_subvector of bitcast values

Fri Aug 11 06:21:41 PDT 2017

Author: niravd
Date: Fri Aug 11 06:21:41 2017
New Revision: 310711

URL: http://llvm.org/viewvc/llvm-project?rev=310711&view=rev
Log:
Improve handling of insert_subvector of bitcast values

Fix insert_subvector / extract_subvector merges of bitcast values.

Reviewers: efriedma, craig.topper, RKSimon

Subscribers: RKSimon, llvm-commits

Differential Revision: https://reviews.llvm.org/D34571

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=310711&r1=310710&r2=310711&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Aug 11 06:21:41 2017
@@ -15890,12 +15890,47 @@ SDValue DAGCombiner::visitINSERT_SUBVECT
   if (N1.isUndef())
     return N0;
 
+  // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
+  // us to pull BITCASTs from input to output.
+  if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
+    if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
+      return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
+
   // If this is an insert of an extracted vector into an undef vector, we can
   // just use the input to the extract.
   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
     return N1.getOperand(0);
 
+  // If we are inserting a bitcast value into an undef, with the same
+  // number of elements, just use the bitcast input of the extract.
+  // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
+  //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
+  if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
+      N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+      N1.getOperand(0).getOperand(1) == N2 &&
+      N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
+          VT.getVectorNumElements()) {
+    return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
+  }
+
+  // If both N1 and N2 are bitcast values on which insert_subvector
+  // would makes sense, pull the bitcast through.
+  // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
+  //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
+  if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
+    SDValue CN0 = N0.getOperand(0);
+    SDValue CN1 = N1.getOperand(0);
+    if (CN0.getValueType().getVectorElementType() ==
+            CN1.getValueType().getVectorElementType() &&
+        CN0.getValueType().getVectorNumElements() ==
+            VT.getVectorNumElements()) {
+      SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
+                                      CN0.getValueType(), CN0, CN1, N2);
+      return DAG.getBitcast(VT, NewINSERT);
+    }
+  }
+
   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )

Modified: llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll?rev=310711&r1=310710&r2=310711&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll (original)
+++ llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll Fri Aug 11 06:21:41 2017
@@ -492,15 +492,10 @@ define void @merge_vec_element_store(<8
   store float %vecext7, float* %arrayidx7, align 4
   ret void
 
-; CHECK: vextractf128	$1, %ymm0, %xmm1
-; CHECK: vinsertf128	$1, %xmm1, %ymm0, %ymm0
+; CHECK-LABEL: merge_vec_element_store
+; CHECK: vmovups %ymm0, (%rdi)
+; CHECK: vzeroupper
 ; CHECK: retq
-
-; This is what should be generated:
-; FIXME-LABEL: merge_vec_element_store
-; FIXME: vmovups
-; FIXME-NEXT: vzeroupper
-; FIXME-NEXT: retq
 }
 
 ; PR21711 - Merge vector stores into wider vector stores.
@@ -520,18 +515,11 @@ define void @merge_vec_extract_stores(<8
   store <4 x float> %shuffle3, <4 x float>* %idx3, align 16
   ret void
 
-; These vblendpd are obviously redundant.
-; CHECK: vblendpd	$12, %ymm0, %ymm0, %ymm0 # ymm0 = ymm0[0,1,2,3]
-; CHECK: vmovupd	%ymm0, 48(%rdi)
-; CHECK: vblendpd	$12, %ymm1, %ymm1, %ymm0 # ymm0 = ymm1[0,1,2,3]
-; CHECK: vmovupd	%ymm0, 80(%rdi)
-
-; This is what should be generated:
-; FIXME-LABEL: merge_vec_extract_stores
-; FIXME:      vmovups %ymm0, 48(%rdi)
-; FIXME-NEXT: vmovups %ymm1, 80(%rdi)
-; FIXME-NEXT: vzeroupper
-; FIXME-NEXT: retq
+; CHECK-LABEL: merge_vec_extract_stores
+; CHECK:      vmovups %ymm0, 48(%rdi)
+; CHECK-NEXT: vmovups %ymm1, 80(%rdi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
 }
 
 ; Merging vector stores when sourced from vector loads.

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=310711&r1=310710&r2=310711&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Fri Aug 11 06:21:41 2017
@@ -807,10 +807,10 @@ define <4 x i64> @shuffle_v4i64_0142(<4
 define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
 ; AVX1-LABEL: shuffle_v4i64_0412:
 ; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm0[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
 ; AVX1-NEXT:    retq
 ;