[llvm] r348282 - [X86][SSE] Add SimplifyDemandedBitsForTargetNode handling for MOVMSK
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 4 08:52:32 PST 2018
Author: rksimon
Date: Tue Dec 4 08:52:32 2018
New Revision: 348282
URL: http://llvm.org/viewvc/llvm-project?rev=348282&view=rev
Log:
[X86][SSE] Add SimplifyDemandedBitsForTargetNode handling for MOVMSK
Moves existing SimplifyDemandedBits call out of combineMOVMSK and add SimplifyDemandedVectorElts call based on the sign bits we need.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/movmsk.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=348282&r1=348281&r2=348282&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Dec 4 08:52:32 2018
@@ -32466,6 +32466,39 @@ bool X86TargetLowering::SimplifyDemanded
}
break;
}
+ case X86ISD::MOVMSK: {
+ SDValue Src = Op.getOperand(0);
+ MVT VT = Op.getSimpleValueType();
+ MVT SrcVT = Src.getSimpleValueType();
+ unsigned SrcBits = SrcVT.getScalarSizeInBits();
+ unsigned NumElts = SrcVT.getVectorNumElements();
+
+ // If we don't need the sign bits at all just return zero.
+ if (OriginalDemandedBits.countTrailingZeros() >= NumElts)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+
+ // Only demand the vector elements of the sign bits we need.
+ APInt KnownUndef, KnownZero;
+ APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
+ if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
+ TLO, Depth + 1))
+ return true;
+
+ Known.Zero = KnownZero.zextOrSelf(BitWidth);
+ Known.Zero.setHighBits(BitWidth - NumElts);
+
+ // MOVMSK only uses the MSB from each vector element.
+ KnownBits KnownSrc;
+ if (SimplifyDemandedBits(Src, APInt::getSignMask(SrcBits), KnownSrc, TLO,
+ Depth + 1))
+ return true;
+
+ if (KnownSrc.One[SrcBits - 1])
+ Known.One.setLowBits(NumElts);
+ else if (KnownSrc.Zero[SrcBits - 1])
+ Known.Zero.setLowBits(NumElts);
+ return false;
+ }
}
return TargetLowering::SimplifyDemandedBitsForTargetNode(
@@ -39566,10 +39599,11 @@ static SDValue combineMOVMSK(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue Src = N->getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
+ MVT VT = N->getSimpleValueType(0);
// Perform constant folding.
if (ISD::isBuildVectorOfConstantSDNodes(Src.getNode())) {
- assert(N->getValueType(0) == MVT::i32 && "Unexpected result type");
+ assert(VT== MVT::i32 && "Unexpected result type");
APInt Imm(32, 0);
for (unsigned Idx = 0, e = Src.getNumOperands(); Idx < e; ++Idx) {
SDValue In = Src.getOperand(Idx);
@@ -39577,7 +39611,7 @@ static SDValue combineMOVMSK(SDNode *N,
cast<ConstantSDNode>(In)->getAPIntValue().isNegative())
Imm.setBit(Idx);
}
- return DAG.getConstant(Imm, SDLoc(N), N->getValueType(0));
+ return DAG.getConstant(Imm, SDLoc(N), VT);
}
// Look through int->fp bitcasts that don't change the element width.
@@ -39587,11 +39621,10 @@ static SDValue combineMOVMSK(SDNode *N,
EVT(SrcVT).changeVectorElementTypeToInteger())
Src = Src.getOperand(0);
+ // Simplify the inputs.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-
- // MOVMSK only uses the MSB from each vector element.
- APInt DemandedMask(APInt::getSignMask(SrcVT.getScalarSizeInBits()));
- if (TLI.SimplifyDemandedBits(Src, DemandedMask, DCI))
+ APInt DemandedMask(APInt::getAllOnesValue(VT.getScalarSizeInBits()));
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
return SDValue(N, 0);
// Combine (movmsk (setne (and X, (1 << C)), 0)) -> (movmsk (X << C)).
Modified: llvm/trunk/test/CodeGen/X86/movmsk.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/movmsk.ll?rev=348282&r1=348281&r2=348282&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/movmsk.ll (original)
+++ llvm/trunk/test/CodeGen/X86/movmsk.ll Tue Dec 4 08:52:32 2018
@@ -134,11 +134,10 @@ define i32 @demandedbits_v16i8(<16 x i8>
ret i32 %2
}
-; TODO: Simplify demanded vector elts
+; Simplify demanded vector elts
define i32 @demandedelts_v4f32(<4 x float> %x) {
; CHECK-LABEL: demandedelts_v4f32:
; CHECK: ## %bb.0:
-; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; CHECK-NEXT: movmskps %xmm0, %eax
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
More information about the llvm-commits
mailing list