[llvm] [X86] Try Folding icmp of v8i32 -> fcmp of v8f32 on AVX (PR #82290)

Mon Feb 19 19:35:33 PST 2024

================
@@ -23299,6 +23299,110 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
     }
   }
 
+  // We get bad codegen for v8i32 compares on avx targets (without avx2) so if
+  // possible convert to a v8f32 compare.
+  if (VTOp0.getVectorElementType() == MVT::i32 && VTOp0 == MVT::v8i32 &&
+      Subtarget.hasAVX() && !Subtarget.hasAVX2()) {
+    std::optional<KnownBits> KnownOps[2];
+    // Check if an op is known to be in a certain range.
+    auto OpInRange = [&DAG, Op, &KnownOps](unsigned OpNo, bool CmpLT,
+                                           const APInt Bound) {
+      if (!KnownOps[OpNo].has_value())
+        KnownOps[OpNo] = DAG.computeKnownBits(Op.getOperand(OpNo));
+
+      if (KnownOps[OpNo]->isUnknown())
+        return false;
+
+      std::optional<bool> Res;
+      if (CmpLT)
+        Res = KnownBits::ult(*KnownOps[OpNo], KnownBits::makeConstant(Bound));
+      else
+        Res = KnownBits::ugt(*KnownOps[OpNo], KnownBits::makeConstant(Bound));
+      return Res.has_value() && *Res;
+    };
+
+    bool OkayCvt = false;
+    bool OkayBitcast = false;
+
+    // For cvt up to 1 << (Significand Precision)
+    const APInt MaxConvertableCvt = APInt(32, (1U << 24));
+    // For bitcast up to (and including) first inf representation (0x7f800000)
+    const APInt MaxConvertableBitcast = APInt(32, 0x7f800001);
+
+    // For bitcast we need both lhs/op1 u< MaxConvertableBitcast
+    // NB: It might be worth it to enable to bitcast version for unsigned avx2
+    // comparisons as they typically require multiple instructions to lower
+    // (they don't fit `vpcmpeq`/`vpcmpgt` well).
+    if (OpInRange(1, /*CmpLT*/ true, MaxConvertableBitcast) &&
+        OpInRange(0, /*CmpLT*/ true, MaxConvertableBitcast)) {
+      OkayBitcast = true;
+    }
+    // We want to convert icmp -> fcmp using `sitofp` iff one of the converts
+    // will be constant folded.
+    else if ((DAG.isConstantValueOfAnyType(peekThroughBitcasts(Op1)) ||
+              DAG.isConstantValueOfAnyType(peekThroughBitcasts(Op0)))) {
+      if (isUnsignedIntSetCC(Cond)) {
+        // For cvt + unsigned compare we need both lhs/rhs >= 0 and either lhs
+        // or rhs < MaxConvertableCvt
+
+        if (OpInRange(1, /*CmpLT*/ true, APInt::getSignedMinValue(32)) &&
+            OpInRange(0, /*CmpLT*/ true, APInt::getSignedMinValue(32)) &&
+            (OpInRange(1, /*CmpLT*/ true, MaxConvertableCvt) ||
+             OpInRange(0, /*CmpLT*/ true, MaxConvertableCvt)))
+          OkayCvt = true;
+      } else {
+        // For cvt + signed compare we need  abs(lhs) or abs(rhs) <
+        // MaxConvertableCvt
+        if (OpInRange(1, /*CmpLT*/ true, MaxConvertableCvt) ||
----------------
goldsteinn wrote:

its `||`. 
Proof is something like:
```
define i1 @src_eq(i32 %x, i32 %C) {
  %C_abs = call i32 @llvm.abs.i32(i32 %C, i1 false)
  %X_abs = call i32 @llvm.abs.i32(i32 %x, i1 false)
  %C_lemma = icmp ult i32 %C_abs, 16777216
  %X_lemma = icmp ult i32 %X_abs, 16777216
  %lemma = or i1 %C_lemma, %X_lemma
  call void @llvm.assume(i1 %lemma)

  %cmp = icmp eq i32 %x, %C
  ret i1 %cmp
}

define i1 @tgt_eq(i32 %x, i32 %C) {
  %C_abs = call i32 @llvm.abs.i32(i32 %C, i1 false)
  %X_abs = call i32 @llvm.abs.i32(i32 %x, i1 false)
  %C_lemma = icmp ult i32 %C_abs, 16777216
  %X_lemma = icmp ult i32 %X_abs, 16777216
  %lemma = or i1 %C_lemma, %X_lemma
  call void @llvm.assume(i1 %lemma)

  %CFp = sitofp i32 %C to float
  %conv = sitofp i32 %x to float
  %cmp = fcmp oeq float %conv, %CFp
  ret i1 %cmp
}

```

The condition:
```
  %C_abs = call i32 @llvm.abs.i32(i32 %C, i1 false)
  %X_abs = call i32 @llvm.abs.i32(i32 %x, i1 false)
  %C_lemma = icmp ult i32 %C_abs, 16777216
  %X_lemma = icmp ult i32 %X_abs, 16777216
  %lemma = or i1 %C_lemma, %X_lemma
  call void @llvm.assume(i1 %lemma)

```
is all `||`.

https://github.com/llvm/llvm-project/pull/82290