[llvm] r294924 - [TargetLowering] fix SETCC SETLT folding with FP types
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 12 15:07:52 PST 2017
Author: spatel
Date: Sun Feb 12 17:07:52 2017
New Revision: 294924
URL: http://llvm.org/viewvc/llvm-project?rev=294924&view=rev
Log:
[TargetLowering] fix SETCC SETLT folding with FP types
The bug was introduced with:
https://reviews.llvm.org/rL294863
...and manifests as a selection failure in x86, but that's actually
another bug. This fix prevents wrong codegen with -0.0, but in the
more common case when we have NSZ and NNAN (-ffast-math), we should
still be able to fold this setcc/compare.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/trunk/test/CodeGen/X86/vselect-pcmp.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=294924&r1=294923&r2=294924&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Sun Feb 12 17:07:52 2017
@@ -751,25 +751,29 @@ bool TargetLowering::SimplifyDemandedBit
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
break;
- case ISD::SETCC:
+ case ISD::SETCC: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
// If (1) we only need the sign-bit, (2) the setcc operands are the same
// width as the setcc result, and (3) the result of a setcc conforms to 0 or
// -1, we may be able to bypass the setcc.
- if (NewMask.isSignBit() &&
- Op.getOperand(0).getScalarValueSizeInBits() == BitWidth &&
+ if (NewMask.isSignBit() && Op0.getScalarValueSizeInBits() == BitWidth &&
getBooleanContents(Op.getValueType()) ==
BooleanContent::ZeroOrNegativeOneBooleanContent) {
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
- // If we're testing if X < 0, then this compare isn't needed - just use X!
- if (CC == ISD::SETLT &&
- (isNullConstant(Op.getOperand(1)) ||
- ISD::isBuildVectorAllZeros(Op.getOperand(1).getNode())))
- return TLO.CombineTo(Op, Op.getOperand(0));
+ // If we're testing X < 0, then this compare isn't needed - just use X!
+ // FIXME: We're limiting to integer types here, but this should also work
+ // if we don't care about FP signed-zero. The use of SETLT with FP means
+ // that we don't care about NaNs.
+ if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
+ (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
+ return TLO.CombineTo(Op, Op0);
// TODO: Should we check for other forms of sign-bit comparisons?
// Examples: X <= -1, X >= 0
}
break;
+ }
case ISD::SHL:
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
unsigned ShAmt = SA->getZExtValue();
Modified: llvm/trunk/test/CodeGen/X86/vselect-pcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vselect-pcmp.ll?rev=294924&r1=294923&r2=294924&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vselect-pcmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vselect-pcmp.ll Sun Feb 12 17:07:52 2017
@@ -317,3 +317,27 @@ define <8 x double> @signbit_sel_v8f64(<
ret <8 x double> %z
}
+; If we have a floating-point compare:
+; (1) Don't die.
+; (2) FIXME: If we don't care about signed-zero (and NaN?), the compare should still get folded.
+
+define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x float> %mask) #0 {
+; AVX12F-LABEL: signbit_sel_v4f32_fcmp:
+; AVX12F: # BB#0:
+; AVX12F-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX12F-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
+; AVX12F-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; AVX12F-NEXT: retq
+;
+; AVX512VL-LABEL: signbit_sel_v4f32_fcmp:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512VL-NEXT: vcmpltps %xmm2, %xmm0, %k1
+; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
+; AVX512VL-NEXT: retq
+ %cmp = fcmp olt <4 x float> %x, zeroinitializer
+ %sel = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y
+ ret <4 x float> %sel
+}
+
+attributes #0 = { "no-nans-fp-math"="true" }
More information about the llvm-commits
mailing list