[llvm] [X86] Try Folding icmp of v8i32 -> fcmp of v8f32 on AVX (PR #82290)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 19 19:35:33 PST 2024
================
@@ -23299,6 +23299,110 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
}
}
+ // We get bad codegen for v8i32 compares on avx targets (without avx2) so if
+ // possible convert to a v8f32 compare.
+ if (VTOp0.getVectorElementType() == MVT::i32 && VTOp0 == MVT::v8i32 &&
+ Subtarget.hasAVX() && !Subtarget.hasAVX2()) {
+ std::optional<KnownBits> KnownOps[2];
+ // Check if an op is known to be in a certain range.
+ auto OpInRange = [&DAG, Op, &KnownOps](unsigned OpNo, bool CmpLT,
+ const APInt Bound) {
+ if (!KnownOps[OpNo].has_value())
+ KnownOps[OpNo] = DAG.computeKnownBits(Op.getOperand(OpNo));
+
+ if (KnownOps[OpNo]->isUnknown())
+ return false;
+
+ std::optional<bool> Res;
+ if (CmpLT)
+ Res = KnownBits::ult(*KnownOps[OpNo], KnownBits::makeConstant(Bound));
+ else
+ Res = KnownBits::ugt(*KnownOps[OpNo], KnownBits::makeConstant(Bound));
+ return Res.has_value() && *Res;
+ };
+
+ bool OkayCvt = false;
+ bool OkayBitcast = false;
+
+ // For cvt up to 1 << (Significand Precision)
+ const APInt MaxConvertableCvt = APInt(32, (1U << 24));
+ // For bitcast up to (and including) first inf representation (0x7f800000)
+ const APInt MaxConvertableBitcast = APInt(32, 0x7f800001);
+
+ // For bitcast we need both lhs/op1 u< MaxConvertableBitcast
+ // NB: It might be worth it to enable to bitcast version for unsigned avx2
+ // comparisons as they typically require multiple instructions to lower
+ // (they don't fit `vpcmpeq`/`vpcmpgt` well).
+ if (OpInRange(1, /*CmpLT*/ true, MaxConvertableBitcast) &&
+ OpInRange(0, /*CmpLT*/ true, MaxConvertableBitcast)) {
+ OkayBitcast = true;
+ }
+ // We want to convert icmp -> fcmp using `sitofp` iff one of the converts
+ // will be constant folded.
+ else if ((DAG.isConstantValueOfAnyType(peekThroughBitcasts(Op1)) ||
+ DAG.isConstantValueOfAnyType(peekThroughBitcasts(Op0)))) {
+ if (isUnsignedIntSetCC(Cond)) {
+ // For cvt + unsigned compare we need both lhs/rhs >= 0 and either lhs
+ // or rhs < MaxConvertableCvt
+
+ if (OpInRange(1, /*CmpLT*/ true, APInt::getSignedMinValue(32)) &&
+ OpInRange(0, /*CmpLT*/ true, APInt::getSignedMinValue(32)) &&
+ (OpInRange(1, /*CmpLT*/ true, MaxConvertableCvt) ||
+ OpInRange(0, /*CmpLT*/ true, MaxConvertableCvt)))
+ OkayCvt = true;
+ } else {
+ // For cvt + signed compare we need abs(lhs) or abs(rhs) <
+ // MaxConvertableCvt
+ if (OpInRange(1, /*CmpLT*/ true, MaxConvertableCvt) ||
----------------
goldsteinn wrote:
its `||`.
Proof is something like:
```
define i1 @src_eq(i32 %x, i32 %C) {
%C_abs = call i32 @llvm.abs.i32(i32 %C, i1 false)
%X_abs = call i32 @llvm.abs.i32(i32 %x, i1 false)
%C_lemma = icmp ult i32 %C_abs, 16777216
%X_lemma = icmp ult i32 %X_abs, 16777216
%lemma = or i1 %C_lemma, %X_lemma
call void @llvm.assume(i1 %lemma)
%cmp = icmp eq i32 %x, %C
ret i1 %cmp
}
define i1 @tgt_eq(i32 %x, i32 %C) {
%C_abs = call i32 @llvm.abs.i32(i32 %C, i1 false)
%X_abs = call i32 @llvm.abs.i32(i32 %x, i1 false)
%C_lemma = icmp ult i32 %C_abs, 16777216
%X_lemma = icmp ult i32 %X_abs, 16777216
%lemma = or i1 %C_lemma, %X_lemma
call void @llvm.assume(i1 %lemma)
%CFp = sitofp i32 %C to float
%conv = sitofp i32 %x to float
%cmp = fcmp oeq float %conv, %CFp
ret i1 %cmp
}
```
The condition:
```
%C_abs = call i32 @llvm.abs.i32(i32 %C, i1 false)
%X_abs = call i32 @llvm.abs.i32(i32 %x, i1 false)
%C_lemma = icmp ult i32 %C_abs, 16777216
%X_lemma = icmp ult i32 %X_abs, 16777216
%lemma = or i1 %C_lemma, %X_lemma
call void @llvm.assume(i1 %lemma)
```
is all `||`.
https://github.com/llvm/llvm-project/pull/82290
More information about the llvm-commits
mailing list