[llvm] [X86][GlobalISel] Enable scalar versions of G_UITOFP and G_FPTOUI (PR #100079)

Tue Jul 23 03:41:21 PDT 2024

================
@@ -644,6 +695,112 @@ bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI,
   return true;
 }
 
+bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI,
+                                      MachineRegisterInfo &MRI,
+                                      LegalizerHelper &Helper) const {
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+  unsigned DstSizeInBits = DstTy.getScalarSizeInBits();
+  const LLT s32 = LLT::scalar(32);
+  const LLT s64 = LLT::scalar(64);
+
+  // Simply reuse FPTOSI when it is possible to widen the type
+  if (DstSizeInBits == 16 || DstSizeInBits == 32) {
+    auto Casted = MIRBuilder.buildFPTOSI(LLT::scalar(DstSizeInBits * 2), Src);
+    MIRBuilder.buildTrunc(Dst, Casted);
+    MI.eraseFromParent();
+    return true;
+  }
+  if (DstTy == s64) {
+    APInt TwoPExpInt = APInt::getSignMask(DstSizeInBits);
+    APFloat TwoPExpFP(SrcTy == s32 ? APFloat::IEEEsingle()
+                                   : APFloat::IEEEdouble(),
+                      APInt::getZero(SrcTy.getSizeInBits()));
+    TwoPExpFP.convertFromAPInt(TwoPExpInt, /*IsSigned=*/false,
+                               APFloat::rmNearestTiesToEven);
+
+    // For fp Src greater or equal to Threshold(2^Exp), we use FPTOSI on
+    // (Src - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
+    // For fp Src smaller, (Src - 2^Exp) is zeroed by And, the final result
+    // is FPTOSI on Src.
+    auto Casted = MIRBuilder.buildFPTOSI(DstTy, Src);
+    auto Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
+    auto FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
+    auto ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
+    auto Shift = MIRBuilder.buildConstant(DstTy, DstSizeInBits - 1);
+    auto ResHighBit = MIRBuilder.buildAShr(DstTy, Casted, Shift);
+    auto And = MIRBuilder.buildAnd(DstTy, ResHighBit, ResLowBits);
+    MIRBuilder.buildOr(Dst, And, Casted);
+    MI.eraseFromParent();
+    return true;
+  }
+  return false;
+}
+
+bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI,
+                                      MachineRegisterInfo &MRI,
+                                      LegalizerHelper &Helper) const {
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+  const LLT s16 = LLT::scalar(16);
+  const LLT s32 = LLT::scalar(32);
+  const LLT s64 = LLT::scalar(64);
+
+  // Simply reuse SITOFP when it is possible to widen the type
+  if (SrcTy == s16 || SrcTy == s32) {
+    const LLT WidenTy = LLT::scalar(SrcTy.getScalarSizeInBits() * 2);
+    auto Ext = MIRBuilder.buildZExt(WidenTy, Src);
+    MIRBuilder.buildSITOFP(Dst, Ext);
+    MI.eraseFromParent();
+    return true;
+  }
+  if (SrcTy == s64 && DstTy == s32) {
----------------
arsenm wrote:

This can go in the generic LegalizerHelper 

https://github.com/llvm/llvm-project/pull/100079