[llvm] r343865 - [SelectionDAG] allow undefs when matching splat constants
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 5 10:42:19 PDT 2018
Author: spatel
Date: Fri Oct 5 10:42:19 2018
New Revision: 343865
URL: http://llvm.org/viewvc/llvm-project?rev=343865&view=rev
Log:
[SelectionDAG] allow undefs when matching splat constants
And use that to transform fsub with zero constant operands.
The integer part isn't used yet, but it is proposed for use in
D44548, so adding both enhancements here makes that
patch simpler.
Modified:
llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/trunk/test/CodeGen/X86/vec_fneg.ll
Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h?rev=343865&r1=343864&r2=343865&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h (original)
+++ llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h Fri Oct 5 10:42:19 2018
@@ -1602,10 +1602,10 @@ SDValue peekThroughOneUseBitcasts(SDValu
bool isBitwiseNot(SDValue V);
/// Returns the SDNode if it is a constant splat BuildVector or constant int.
-ConstantSDNode *isConstOrConstSplat(SDValue N);
+ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false);
/// Returns the SDNode if it is a constant splat BuildVector or constant float.
-ConstantFPSDNode *isConstOrConstSplatFP(SDValue N);
+ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
class GlobalAddressSDNode : public SDNode {
friend class SelectionDAG;
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=343865&r1=343864&r2=343865&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Oct 5 10:42:19 2018
@@ -11011,8 +11011,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N
SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
- ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
+ ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
@@ -11044,9 +11044,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N
return DAG.getConstantFP(0.0f, DL, VT);
}
- // (fsub 0, B) -> -B
+ // (fsub -0.0, N1) -> -N1
if (N0CFP && N0CFP->isZero()) {
- if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) {
+ if (N0CFP->isNegative() ||
+ (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=343865&r1=343864&r2=343865&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Fri Oct 5 10:42:19 2018
@@ -8211,7 +8211,7 @@ bool llvm::isBitwiseNot(SDValue V) {
return C && C->isAllOnesValue();
}
-ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
return CN;
@@ -8220,9 +8220,7 @@ ConstantSDNode *llvm::isConstOrConstSpla
ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
// BuildVectors can truncate their operands. Ignore that case here.
- // FIXME: We blindly ignore splats which include undef which is overly
- // pessimistic.
- if (CN && UndefElements.none() &&
+ if (CN && (UndefElements.none() || AllowUndefs) &&
CN->getValueType(0) == N.getValueType().getScalarType())
return CN;
}
@@ -8230,15 +8228,14 @@ ConstantSDNode *llvm::isConstOrConstSpla
return nullptr;
}
-ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N) {
+ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) {
if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
return CN;
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
BitVector UndefElements;
ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
-
- if (CN && UndefElements.none())
+ if (CN && (UndefElements.none() || AllowUndefs))
return CN;
}
Modified: llvm/trunk/test/CodeGen/X86/vec_fneg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fneg.ll?rev=343865&r1=343864&r2=343865&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fneg.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fneg.ll Fri Oct 5 10:42:19 2018
@@ -102,16 +102,12 @@ define <2 x float> @fneg_bitcast(i64 %i)
define <4 x float> @fneg_undef_elts_v4f32(<4 x float> %x) {
; X32-SSE-LABEL: fneg_undef_elts_v4f32:
; X32-SSE: # %bb.0:
-; X32-SSE-NEXT: movaps {{.*#+}} xmm1 = <-0,u,u,-0>
-; X32-SSE-NEXT: subps %xmm0, %xmm1
-; X32-SSE-NEXT: movaps %xmm1, %xmm0
+; X32-SSE-NEXT: xorps {{\.LCPI.*}}, %xmm0
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: fneg_undef_elts_v4f32:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movaps {{.*#+}} xmm1 = <-0,u,u,-0>
-; X64-SSE-NEXT: subps %xmm0, %xmm1
-; X64-SSE-NEXT: movaps %xmm1, %xmm0
+; X64-SSE-NEXT: xorps {{.*}}(%rip), %xmm0
; X64-SSE-NEXT: retq
%r = fsub <4 x float> <float -0.0, float undef, float undef, float -0.0>, %x
ret <4 x float> %r
@@ -120,25 +116,13 @@ define <4 x float> @fneg_undef_elts_v4f3
; This isn't fneg, but similarly check that (X - 0.0) is simplified.
define <4 x float> @fsub0_undef_elts_v4f32(<4 x float> %x) {
-; X32-SSE1-LABEL: fsub0_undef_elts_v4f32:
-; X32-SSE1: # %bb.0:
-; X32-SSE1-NEXT: retl
-;
-; X32-SSE2-LABEL: fsub0_undef_elts_v4f32:
-; X32-SSE2: # %bb.0:
-; X32-SSE2-NEXT: xorps %xmm1, %xmm1
-; X32-SSE2-NEXT: subps %xmm1, %xmm0
-; X32-SSE2-NEXT: retl
-;
-; X64-SSE1-LABEL: fsub0_undef_elts_v4f32:
-; X64-SSE1: # %bb.0:
-; X64-SSE1-NEXT: retq
+; X32-SSE-LABEL: fsub0_undef_elts_v4f32:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: retl
;
-; X64-SSE2-LABEL: fsub0_undef_elts_v4f32:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: xorps %xmm1, %xmm1
-; X64-SSE2-NEXT: subps %xmm1, %xmm0
-; X64-SSE2-NEXT: retq
+; X64-SSE-LABEL: fsub0_undef_elts_v4f32:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: retq
%r = fsub <4 x float> %x, <float 0.0, float undef, float 0.0, float undef>
ret <4 x float> %r
}
More information about the llvm-commits
mailing list