[llvm] [X86] Try Folding icmp of v8i32 -> fcmp of v8f32 on AVX (PR #82290)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 26 16:00:58 PDT 2024
================
@@ -23391,6 +23391,136 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
}
}
+ // We get bad codegen for v8i32 compares on avx targets (without avx2) so if
+ // possible convert to a v8f32 compare.
+ if (VTOp0 == MVT::v8i32 && Subtarget.hasAVX() && !Subtarget.hasAVX2()) {
+ std::optional<KnownBits> KnownOps[2];
+ // Check if an op is known to be in a certain range.
+ auto OpInRange = [&DAG, Op, &KnownOps](unsigned OpNo, bool CmpLT,
+ const APInt Bound) {
+ if (!KnownOps[OpNo].has_value())
+ KnownOps[OpNo] = DAG.computeKnownBits(Op.getOperand(OpNo));
+
+ if (KnownOps[OpNo]->isUnknown())
+ return false;
+
+ std::optional<bool> Res;
+ if (CmpLT)
+ Res = KnownBits::ult(*KnownOps[OpNo], KnownBits::makeConstant(Bound));
+ else
+ Res = KnownBits::ugt(*KnownOps[OpNo], KnownBits::makeConstant(Bound));
+ return Res.value_or(false);
+ };
+
+ bool OkayCvt = false;
+ bool OkayBitcast = false;
+
+ const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(MVT::f32);
+
+ // For cvt up to 1 << (Significand Precision), (1 << 24 for ieee float)
+ const APInt MaxConvertableCvt =
+ APInt::getOneBitSet(32, APFloat::semanticsPrecision(Sem));
+ // For bitcast up to (and including) first inf representation (0x7f800000 +
+ // 1 for ieee float)
+ const APInt MaxConvertableBitcast =
+ APFloat::getInf(Sem).bitcastToAPInt() + 1;
+ // For bitcast we also exclude de-norm values. This is absolutely necessary
+ // for strict semantic correctness, but DAZ (de-norm as zero) will break if
+ // we don't have this check.
+ const APInt MinConvertableBitcast =
+ APFloat::getSmallestNormalized(Sem).bitcastToAPInt() - 1;
+
+ assert(
+ MaxConvertableBitcast.getBitWidth() == 32 &&
+ MaxConvertableCvt == (1U << 24) &&
+ MaxConvertableBitcast == 0x7f800001 &&
+ MinConvertableBitcast.isNonNegative() &&
+ MaxConvertableBitcast.sgt(MinConvertableBitcast) &&
+ "This transform has only been verified to IEEE Single Precision Float");
+
+ // For bitcast we need both lhs/op1 u< MaxConvertableBitcast
+ // NB: It might be worth it to enable to bitcast version for unsigned avx2
+ // comparisons as they typically require multiple instructions to lower
+ // (they don't fit `vpcmpeq`/`vpcmpgt` well).
+ if (OpInRange(1, /*CmpLT*/ true, MaxConvertableBitcast) &&
+ OpInRange(1, /*CmpLT*/ false, MinConvertableBitcast) &&
+ OpInRange(0, /*CmpLT*/ true, MaxConvertableBitcast) &&
+ OpInRange(0, /*CmpLT*/ false, MinConvertableBitcast)) {
+ OkayBitcast = true;
+ }
+ // We want to convert icmp -> fcmp using `sitofp` iff one of the converts
+ // will be constant folded.
+ else if ((DAG.isConstantValueOfAnyType(peekThroughBitcasts(Op1)) ||
+ DAG.isConstantValueOfAnyType(peekThroughBitcasts(Op0)))) {
+ if (isUnsignedIntSetCC(Cond)) {
+ // For cvt + unsigned compare we need both lhs/rhs >= 0 and either lhs
+ // or rhs < MaxConvertableCvt
+
+ if (OpInRange(1, /*CmpLT*/ true, APInt::getSignedMinValue(32)) &&
+ OpInRange(0, /*CmpLT*/ true, APInt::getSignedMinValue(32)) &&
----------------
RKSimon wrote:
@goldsteinn The comment says both lhs/rhs >= 0 but the comparisons are with MIN_INT ?
https://github.com/llvm/llvm-project/pull/82290
More information about the llvm-commits
mailing list