[llvm] r222536 - [DAG] Teach how to turn a build_vector into a shuffle if some of the operands are zero.
Andrea Di Biagio
Andrea_DiBiagio at sn.scee.net
Fri Nov 21 06:32:07 PST 2014
Author: adibiagio
Date: Fri Nov 21 08:32:06 2014
New Revision: 222536
URL: http://llvm.org/viewvc/llvm-project?rev=222536&view=rev
Log:
[DAG] Teach how to turn a build_vector into a shuffle if some of the operands are zero.
Before this patch, the DAGCombiner only tried to convert build_vector dag nodes
into shuffles if all operands were either extract_vector_elt or undef.
This patch improves that logic and teaches the DAGCombiner how to deal with
build_vector dag nodes where one or more operands are zero. A build_vector
dag node with some zero operands is turned into a shuffle only if the resulting
shuffle mask is legal for the target.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/sse41.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=222536&r1=222535&r2=222536&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Nov 21 08:32:06 2014
@@ -10557,26 +10557,37 @@ SDValue DAGCombiner::visitBUILD_VECTOR(S
return SDValue();
SDValue VecIn1, VecIn2;
+ bool UsesZeroVector = false;
for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue Op = N->getOperand(i);
// Ignore undef inputs.
- if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (Op.getOpcode() == ISD::UNDEF) continue;
+
+ // See if we can combine this build_vector into a blend with a zero vector.
+ if (!VecIn2.getNode() && ((Op.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Op.getNode())->isNullValue()) ||
+ (Op.getOpcode() == ISD::ConstantFP &&
+ cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) {
+ UsesZeroVector = true;
+ continue;
+ }
// If this input is something other than a EXTRACT_VECTOR_ELT with a
// constant index, bail out.
- if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
+ if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Op.getOperand(1))) {
VecIn1 = VecIn2 = SDValue(nullptr, 0);
break;
}
// We allow up to two distinct input vectors.
- SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
+ SDValue ExtractedFromVec = Op.getOperand(0);
if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
continue;
if (!VecIn1.getNode()) {
VecIn1 = ExtractedFromVec;
- } else if (!VecIn2.getNode()) {
+ } else if (!VecIn2.getNode() && !UsesZeroVector) {
VecIn2 = ExtractedFromVec;
} else {
// Too many inputs.
@@ -10589,16 +10600,26 @@ SDValue DAGCombiner::visitBUILD_VECTOR(S
if (VecIn1.getNode()) {
SmallVector<int, 8> Mask;
for (unsigned i = 0; i != NumInScalars; ++i) {
- if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
+ unsigned Opcode = N->getOperand(i).getOpcode();
+ if (Opcode == ISD::UNDEF) {
Mask.push_back(-1);
continue;
}
+ // Operands can also be zero.
+ if (Opcode != ISD::EXTRACT_VECTOR_ELT) {
+ assert(UsesZeroVector &&
+ (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) &&
+ "Unexpected node found!");
+ Mask.push_back(NumInScalars+i);
+ continue;
+ }
+
// If extracting from the first vector, just use the index directly.
SDValue Extract = N->getOperand(i);
SDValue ExtVal = Extract.getOperand(1);
+ unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
if (Extract.getOperand(0) == VecIn1) {
- unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
if (ExtIndex > VT.getVectorNumElements())
return SDValue();
@@ -10607,10 +10628,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(S
}
// Otherwise, use InIdx + VecSize
- unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
- Mask.push_back(Idx+NumInScalars);
+ Mask.push_back(NumInScalars+ExtIndex);
}
+ // Avoid introducing illegal shuffles with zero.
+ if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT))
+ return SDValue();
+
// We can't generate a shuffle node with mismatched input and output types.
// Attempt to transform a single input vector to the correct type.
if ((VT != VecIn1.getValueType())) {
@@ -10634,8 +10658,12 @@ SDValue DAGCombiner::visitBUILD_VECTOR(S
VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
}
- // If VecIn2 is unused then change it to undef.
- VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+ if (UsesZeroVector)
+ VecIn2 = VT.isInteger() ? DAG.getConstant(0, VT) :
+ DAG.getConstantFP(0.0, VT);
+ else
+ // If VecIn2 is unused then change it to undef.
+ VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
// Check that we were able to transform all incoming values to the same
// type.
Modified: llvm/trunk/test/CodeGen/X86/sse41.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41.ll?rev=222536&r1=222535&r2=222536&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41.ll Fri Nov 21 08:32:06 2014
@@ -527,14 +527,14 @@ define <4 x float> @shuf_X00A(<4 x float
; X32: ## BB#0:
; X32-NEXT: xorps %xmm2, %xmm2
; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
-; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0]
+; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: shuf_X00A:
; X64: ## BB#0:
; X64-NEXT: xorps %xmm2, %xmm2
; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
-; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0]
+; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; X64-NEXT: retq
%vecext = extractelement <4 x float> %x, i32 0
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
@@ -549,7 +549,7 @@ define <4 x float> @shuf_X00X(<4 x float
; X32: ## BB#0:
; X32-NEXT: xorps %xmm1, %xmm1
; X32-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
-; X32-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],zero,zero,xmm0[0]
+; X32-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[0]
; X32-NEXT: movaps %xmm1, %xmm0
; X32-NEXT: retl
;
@@ -557,7 +557,7 @@ define <4 x float> @shuf_X00X(<4 x float
; X64: ## BB#0:
; X64-NEXT: xorps %xmm1, %xmm1
; X64-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
-; X64-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],zero,zero,xmm0[0]
+; X64-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[0]
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: retq
%vecext = extractelement <4 x float> %x, i32 0
@@ -572,8 +572,8 @@ define <4 x float> @shuf_X0YC(<4 x float
; X32-LABEL: shuf_X0YC:
; X32: ## BB#0:
; X32-NEXT: xorps %xmm2, %xmm2
-; X32-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
-; X32-NEXT: insertps {{.*#+}} xmm2 = xmm2[0],zero,xmm0[1],zero
+; X32-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2,3]
+; X32-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1],xmm0[1],zero
; X32-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm1[2]
; X32-NEXT: movaps %xmm2, %xmm0
; X32-NEXT: retl
@@ -581,8 +581,8 @@ define <4 x float> @shuf_X0YC(<4 x float
; X64-LABEL: shuf_X0YC:
; X64: ## BB#0:
; X64-NEXT: xorps %xmm2, %xmm2
-; X64-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
-; X64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0],zero,xmm0[1],zero
+; X64-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2,3]
+; X64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1],xmm0[1],zero
; X64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm1[2]
; X64-NEXT: movaps %xmm2, %xmm0
; X64-NEXT: retq
@@ -741,8 +741,7 @@ define <4 x i32> @i32_shuf_X00X(<4 x i32
define <4 x i32> @i32_shuf_X0YC(<4 x i32> %x, <4 x i32> %a) {
; X32-LABEL: i32_shuf_X0YC:
; X32: ## BB#0:
-; X32-NEXT: xorps %xmm2, %xmm2
-; X32-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
+; X32-NEXT: pmovzxdq %xmm0, %xmm2
; X32-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1],xmm0[1],zero
; X32-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm1[2]
; X32-NEXT: movaps %xmm2, %xmm0
@@ -750,8 +749,7 @@ define <4 x i32> @i32_shuf_X0YC(<4 x i32
;
; X64-LABEL: i32_shuf_X0YC:
; X64: ## BB#0:
-; X64-NEXT: xorps %xmm2, %xmm2
-; X64-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
+; X64-NEXT: pmovzxdq %xmm0, %xmm2
; X64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1],xmm0[1],zero
; X64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm1[2]
; X64-NEXT: movaps %xmm2, %xmm0
@@ -1146,3 +1144,42 @@ entry:
%vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 3
ret <4 x float> %vecinit3
}
+
+define <4 x float> @build_vector_to_shuffle_1(<4 x float> %A) {
+; X32-LABEL: build_vector_to_shuffle_1:
+; X32: ## BB#0:
+; X32-NEXT: xorps %xmm1, %xmm1
+; X32-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
+; X32-NEXT: retl
+;
+; X64-LABEL: build_vector_to_shuffle_1:
+; X64: ## BB#0:
+; X64-NEXT: xorps %xmm1, %xmm1
+; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
+; X64-NEXT: retq
+entry:
+ %vecext = extractelement <4 x float> %A, i32 1
+ %vecinit = insertelement <4 x float> zeroinitializer, float %vecext, i32 1
+ %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 2
+ %vecinit3 = shufflevector <4 x float> %vecinit1, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+ ret <4 x float> %vecinit3
+}
+
+define <4 x float> @build_vector_to_shuffle_2(<4 x float> %A) {
+; X32-LABEL: build_vector_to_shuffle_2:
+; X32: ## BB#0:
+; X32-NEXT: xorps %xmm1, %xmm1
+; X32-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
+; X32-NEXT: retl
+;
+; X64-LABEL: build_vector_to_shuffle_2:
+; X64: ## BB#0:
+; X64-NEXT: xorps %xmm1, %xmm1
+; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
+; X64-NEXT: retq
+entry:
+ %vecext = extractelement <4 x float> %A, i32 1
+ %vecinit = insertelement <4 x float> zeroinitializer, float %vecext, i32 1
+ %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 2
+ ret <4 x float> %vecinit1
+}
More information about the llvm-commits
mailing list