[llvm] [X86][GlobalISel] Enable scalar versions of G_UITOFP and G_FPTOUI (PR #100079)

Tue Jul 30 15:51:01 PDT 2024

================
@@ -644,6 +691,77 @@ bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI,
   return true;
 }
 
+bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI,
+                                      MachineRegisterInfo &MRI,
+                                      LegalizerHelper &Helper) const {
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+  unsigned DstSizeInBits = DstTy.getScalarSizeInBits();
+  const LLT s32 = LLT::scalar(32);
+  const LLT s64 = LLT::scalar(64);
+
+  // Simply reuse FPTOSI when it is possible to widen the type
+  if (DstSizeInBits <= 32) {
+    auto Casted = MIRBuilder.buildFPTOSI(DstTy == s32 ? s64 : s32, Src);
+    MIRBuilder.buildTrunc(Dst, Casted);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  if (DstTy == s64) {
+    APInt TwoPExpInt = APInt::getSignMask(DstSizeInBits);
+    APFloat TwoPExpFP(SrcTy == s32 ? APFloat::IEEEsingle()
+                                   : APFloat::IEEEdouble(),
+                      APInt::getZero(SrcTy.getSizeInBits()));
+    TwoPExpFP.convertFromAPInt(TwoPExpInt, /*IsSigned=*/false,
+                               APFloat::rmNearestTiesToEven);
+
+    // For fp Src greater or equal to Threshold(2^Exp), we use FPTOSI on
+    // (Src - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
+    // For fp Src smaller, (Src - 2^Exp) is zeroed by And, the final result
+    // is FPTOSI on Src.
+    auto Casted = MIRBuilder.buildFPTOSI(DstTy, Src);
+    auto Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
+    auto FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
+    auto ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
+    auto Shift = MIRBuilder.buildConstant(DstTy, DstSizeInBits - 1);
+    auto ResHighBit = MIRBuilder.buildAShr(DstTy, Casted, Shift);
+    auto And = MIRBuilder.buildAnd(DstTy, ResHighBit, ResLowBits);
+    MIRBuilder.buildOr(Dst, And, Casted);
+    MI.eraseFromParent();
+    return true;
+  }
+  return false;
+}
+
+bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI,
+                                      MachineRegisterInfo &MRI,
+                                      LegalizerHelper &Helper) const {
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+  const LLT s32 = LLT::scalar(32);
+  const LLT s64 = LLT::scalar(64);
+
+  // Simply reuse SITOFP when it is possible to widen the type
+  if (SrcTy.getSizeInBits() <= 32) {
+    auto Ext = MIRBuilder.buildZExt(SrcTy == s32 ? s64 : s32, Src);
+    MIRBuilder.buildSITOFP(Dst, Ext);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  if (SrcTy == s64 && DstTy == s32)
+    return Helper.lowerU64ToF32WithSITOFP(MI) !=
+           LegalizerHelper::LegalizeResult::UnableToLegalize;
+
+  if (SrcTy == s64 && DstTy == s64)
+    // TODO: rewrite with vector shuffles when supported.
+    return Helper.lowerU64ToF64BitFloatOps(MI) !=
+           LegalizerHelper::LegalizeResult::UnableToLegalize;
----------------
e-kud wrote:

Unfortunately no, we still don't distinguish between smaller types promoted to `s32` and original `s32` because after widening `s8->s32` we start rule iteration over and have no clue whether we should widen type to `s64` or not.

https://github.com/llvm/llvm-project/pull/100079