[PATCH] D55274: [DagCombiner][X86] Simplify a ConcatVectors of a scalar_to_vector with undef.
Andrea Di Biagio via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 4 08:12:59 PST 2018
andreadb created this revision.
andreadb added reviewers: RKSimon, craig.topper, spatel.
This patch introduces a new DAGCombiner rule to simplify concat_vectors nodes:
concat_vectors( bitcast (scalar_to_vector %A), UNDEF) --> bitcast (scalar_to_vector %A)
This patch only partially addresses PR39257. In particular, it is enough to fix one of the two problematic cases mentioned in PR39257. However, it is not enough to fix the original test case from Craig in PR39257; that particular case would probably require a more complicated approach (and knowledge about used bits).
Before this patch, we used to generate the following code for function PR39257 (-mtriple=x86_64 , -mattr=+avx):
vmovsd (%rdi), %xmm0 # xmm0 = mem[0],zero
vxorps %xmm1, %xmm1, %xmm1
vblendps $3, %xmm0, %xmm1, %xmm0 # xmm0 = xmm0[0,1],xmm1[2,3]
vmovaps %ymm0, (%rsi)
vzeroupper
retq
Now we generate this:
vmovsd (%rdi), %xmm0 # xmm0 = mem[0],zero
vmovaps %ymm0, (%rsi)
vzeroupper
retq
As a side note: that VZEROUPPER is completely redundant...
I guess the vzeroupper insertion pass doesn't realize that the definition of %xmm0 from vmovsd is already zeroing the upper half of %ymm0. Note that on `-mcpu=btver2`, we don't get that vzeroupper because pass vzeroupper insertion pass is disabled.
https://reviews.llvm.org/D55274
Files:
lib/CodeGen/SelectionDAG/DAGCombiner.cpp
test/CodeGen/X86/simplify_concat_vectors.ll
Index: test/CodeGen/X86/simplify_concat_vectors.ll
===================================================================
--- test/CodeGen/X86/simplify_concat_vectors.ll
+++ test/CodeGen/X86/simplify_concat_vectors.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s
+
+define void @PR32957(<2 x float>* %in, <8 x float>* %out) {
+; CHECK-LABEL: PR32957:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: vmovaps %ymm0, (%rsi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %ld = load <2 x float>, <2 x float>* %in, align 8
+ %ext = extractelement <2 x float> %ld, i64 0
+ %ext2 = extractelement <2 x float> %ld, i64 1
+ %ins = insertelement <8 x float> <float undef, float undef, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, float %ext, i64 0
+ %ins2 = insertelement <8 x float> %ins, float %ext2, i64 1
+ store <8 x float> %ins2, <8 x float>* %out, align 32
+ ret void
+}
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16310,18 +16310,42 @@
return SDValue();
}
-static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
+static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG,
+ unsigned LegalOperations) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT OpVT = N->getOperand(0).getValueType();
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ // concat_vectors( bitcast (scalar_to_vector %A), UNDEF) -->
+ // bitcast (scalar_to_vector %A)
+ if (!LegalOperations && N->getNumOperands() > 1) {
+ SDValue Op0 = N->getOperand(0);
+ if (Op0.hasOneUse() && Op0.getOpcode() == ISD::BITCAST &&
+ Op0.getOperand(0).hasOneUse() &&
+ Op0.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ bool AllUndefs =
+ std::all_of(N->op_begin() + 1, N->op_end(),
+ [](const SDValue &U) { return U.isUndef(); });
+
+ if (AllUndefs) {
+ SDValue Scalar = Op0.getOperand(0).getOperand(0);
+ EVT SVT = Scalar.getValueType();
+
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT,
+ VT.getSizeInBits() / SVT.getSizeInBits());
+ SDValue STV = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, NewVT, Scalar);
+ return DAG.getBitcast(VT, STV);
+ }
+ }
+ }
+
// If the operands are legal vectors, leave them alone.
if (TLI.isTypeLegal(OpVT))
return SDValue();
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
SmallVector<SDValue, 8> Ops;
-
EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
@@ -16551,7 +16575,7 @@
}
// Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
- if (SDValue V = combineConcatVectorOfScalars(N, DAG))
+ if (SDValue V = combineConcatVectorOfScalars(N, DAG, LegalOperations))
return V;
// Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D55274.176636.patch
Type: text/x-patch
Size: 3312 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20181204/8654cafd/attachment.bin>
More information about the llvm-commits
mailing list