[PATCH] Teach the DAGCombiner how to fold insert_subvector nodes with concat_vectors input

Thu Jan 30 11:21:58 PST 2014

Hi Manman,

Thanks for the review - I've attached a new rebased patch with the
dot.  I don't have commit access, so could you commit it for me?

Thanks,
Rob.

On 30 January 2014 18:46, Manman Ren <manman.ren at gmail.com> wrote:
>
> LGTM except one nit:
> +
> +  // If the input vector is a concatenation, and the insert replaces
> +  // one of the halves, we can optimize into a single concat_vectors
>
> The comment should end with a period :)
>
> Thanks,
> Manman
>
>
> On Thu, Jan 30, 2014 at 8:39 AM, Robert Lougher <rob.lougher at gmail.com>
> wrote:
>>
>> ping.
>>
>> On 23 January 2014 19:57, Robert Lougher <rob.lougher at gmail.com> wrote:
>> > Hi,
>> >
>> > This patch teaches the DAGCombiner how to fold insert_subvector nodes
>> > when the input is a concat_vectors and the insert replaces one of the
>> > concat halves:
>> >
>> > Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
>> > (concat_vectors Z, Y)
>> > Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
>> > (concat_vectors X, Z)
>> >
>> > This can be seen with the following IR:
>> >
>> > define <8 x float> @lower_half(<4 x float> %v1, <4 x float> %v2, <4 x
>> > float> %v3) {
>> >   %1 = shufflevector <4 x float> %v1, <4 x float> %v2, <8 x i32> <i32
>> > 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
>> >   %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x
>> > float> %1, <4 x float> %v3, i8 0)
>> >
>> > The vinsertf128 intrinsic is converted into an insert_subvector node
>> > in SelectionDAGBuilder.cpp.
>> >
>> > Using AVX, without the patch this generates two vinsertf128
>> > instructions:
>> >
>> > vinsertf128 $1, %xmm1, %ymm0, %ymm0
>> > vinsertf128 $0, %xmm2, %ymm0, %ymm0
>> >
>> > With the patch this is optimized into:
>> >
>> > vinsertf128 $1, %xmm1, %ymm2, %ymm0
>> >
>> >
>> > I have added a test that checks both the upper and lower halves.  If
>> > the patch looks OK please submit for me.
>> >
>> > Thanks,
>> > Rob.
>> >
>> > --
>> > Robert Lougher
>> > SN Systems - Sony Computer Entertainment Group
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
>
-------------- next part --------------
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================

--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp	(revision 200481)
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp	(working copy)
@@ -274,6 +274,7 @@
     SDValue visitCONCAT_VECTORS(SDNode *N);
     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
     SDValue visitVECTOR_SHUFFLE(SDNode *N);
+    SDValue visitINSERT_SUBVECTOR(SDNode *N);
 
     SDValue XformToShuffleWithZero(SDNode *N);
     SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
@@ -1230,6 +1231,7 @@
   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
+  case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
   }
   return SDValue();
 }
@@ -10431,6 +10433,33 @@
   return SDValue();
 }
 
+SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N2 = N->getOperand(2);
+
+  // If the input vector is a concatenation, and the insert replaces
+  // one of the halves, we can optimize into a single concat_vectors.
+  if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
+      N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) {
+    APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
+    EVT VT = N->getValueType(0);
+
+    // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
+    // (concat_vectors Z, Y)
+    if (InsIdx == 0)
+      return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
+                         N->getOperand(1), N0.getOperand(1));
+
+    // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
+    // (concat_vectors X, Z)
+    if (InsIdx == VT.getVectorNumElements()/2)
+      return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
+                         N0.getOperand(0), N->getOperand(1));
+  }
+
+  return SDValue();
+}
+
 /// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
 /// an AND to a vector_shuffle with the destination vector and a zero vector.
 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
Index: test/CodeGen/X86/vec_shuf-insert.ll
===================================================================
--- test/CodeGen/X86/vec_shuf-insert.ll	(revision 0)
+++ test/CodeGen/X86/vec_shuf-insert.ll	(revision 0)
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx | FileCheck %s
+
+; These tests check that an insert_subvector which replaces one of the halves
+; of a concat_vectors is optimized into a single vinsertf128.
+
+declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8)
+
+define <8 x float> @lower_half(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) {
+  %1 = shufflevector <4 x float> %v1, <4 x float> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %1, <4 x float> %v3, i8 0)
+  ret <8 x float> %2
+
+; CHECK-LABEL: lower_half
+; CHECK-NOT: vinsertf128
+; CHECK: vinsertf128 $1, %xmm1, %ymm2, %ymm0
+; CHECK-NEXT: ret
+}
+
+define <8 x float> @upper_half(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) {
+  %1 = shufflevector <4 x float> %v1, <4 x float> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %1, <4 x float> %v3, i8 1)
+  ret <8 x float> %2
+
+; CHECK-LABEL: upper_half
+; CHECK-NOT: vinsertf128
+; CHECK: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; CHECK-NEXT: ret
+}