[PATCH] D55274: [DagCombiner][X86] Simplify a ConcatVectors of a scalar_to_vector with undef.

Tue Dec 4 08:12:59 PST 2018

andreadb created this revision.
andreadb added reviewers: RKSimon, craig.topper, spatel.

This patch introduces a new DAGCombiner rule to simplify concat_vectors nodes:

  concat_vectors( bitcast (scalar_to_vector %A), UNDEF) --> bitcast (scalar_to_vector %A)

This patch only partially addresses PR39257. In particular, it is enough to fix one of the two problematic cases mentioned in PR39257. However, it is not enough to fix the original test case from Craig in PR39257; that particular case would probably require a more complicated approach (and knowledge about used bits).

Before this patch, we used to generate the following code for function PR39257  (-mtriple=x86_64 , -mattr=+avx):

  vmovsd  (%rdi), %xmm0           # xmm0 = mem[0],zero
  vxorps  %xmm1, %xmm1, %xmm1
  vblendps        $3, %xmm0, %xmm1, %xmm0 # xmm0 = xmm0[0,1],xmm1[2,3]
  vmovaps %ymm0, (%rsi)
  vzeroupper
  retq

Now we generate this:

  vmovsd  (%rdi), %xmm0           # xmm0 = mem[0],zero
  vmovaps %ymm0, (%rsi)
  vzeroupper
  retq

As a side note: that VZEROUPPER is completely redundant...

I guess the vzeroupper insertion pass doesn't realize that the definition of %xmm0 from vmovsd is already zeroing the upper half of %ymm0. Note that on `-mcpu=btver2`, we don't get that vzeroupper because pass vzeroupper insertion pass is disabled.


https://reviews.llvm.org/D55274

Files:
  lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  test/CodeGen/X86/simplify_concat_vectors.ll


Index: test/CodeGen/X86/simplify_concat_vectors.ll
===================================================================

--- test/CodeGen/X86/simplify_concat_vectors.ll
+++ test/CodeGen/X86/simplify_concat_vectors.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s
+
+define void @PR32957(<2 x float>* %in, <8 x float>* %out) {
+; CHECK-LABEL: PR32957:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    vmovaps %ymm0, (%rsi)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %ld = load <2 x float>, <2 x float>* %in, align 8
+  %ext = extractelement <2 x float> %ld, i64 0
+  %ext2 = extractelement <2 x float> %ld, i64 1
+  %ins = insertelement <8 x float> <float undef, float undef, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, float %ext, i64 0
+  %ins2 = insertelement <8 x float> %ins, float %ext2, i64 1
+  store <8 x float> %ins2, <8 x float>* %out, align 32
+  ret void
+}
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16310,18 +16310,42 @@
   return SDValue();
 }
 
-static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
+static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG,
+                                            unsigned LegalOperations) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT OpVT = N->getOperand(0).getValueType();
 
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+
+  // concat_vectors( bitcast (scalar_to_vector %A), UNDEF) -->
+  //     bitcast (scalar_to_vector %A)
+  if (!LegalOperations && N->getNumOperands() > 1) {
+    SDValue Op0 = N->getOperand(0);
+    if (Op0.hasOneUse() && Op0.getOpcode() == ISD::BITCAST &&
+        Op0.getOperand(0).hasOneUse() &&
+        Op0.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR) {
+      bool AllUndefs =
+          std::all_of(N->op_begin() + 1, N->op_end(),
+                      [](const SDValue &U) { return U.isUndef(); });
+
+      if (AllUndefs) {
+        SDValue Scalar = Op0.getOperand(0).getOperand(0);
+        EVT SVT = Scalar.getValueType();
+
+        EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT,
+                                     VT.getSizeInBits() / SVT.getSizeInBits());
+        SDValue STV = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, NewVT, Scalar);
+        return DAG.getBitcast(VT, STV);
+      }
+    }
+  }
+
   // If the operands are legal vectors, leave them alone.
   if (TLI.isTypeLegal(OpVT))
     return SDValue();
 
-  SDLoc DL(N);
-  EVT VT = N->getValueType(0);
   SmallVector<SDValue, 8> Ops;
-
   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
 
@@ -16551,7 +16575,7 @@
   }
 
   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
-  if (SDValue V = combineConcatVectorOfScalars(N, DAG))
+  if (SDValue V = combineConcatVectorOfScalars(N, DAG, LegalOperations))
     return V;
 
   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D55274.176636.patch
Type: text/x-patch
Size: 3312 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20181204/8654cafd/attachment.bin>