[llvm] [X86][GlobalISel] Enable scalar versions of G_UITOFP and G_FPTOUI (PR #100079)
Evgenii Kudriashov via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 30 15:51:01 PDT 2024
================
@@ -644,6 +691,77 @@ bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI,
return true;
}
+bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+ unsigned DstSizeInBits = DstTy.getScalarSizeInBits();
+ const LLT s32 = LLT::scalar(32);
+ const LLT s64 = LLT::scalar(64);
+
+ // Simply reuse FPTOSI when it is possible to widen the type
+ if (DstSizeInBits <= 32) {
+ auto Casted = MIRBuilder.buildFPTOSI(DstTy == s32 ? s64 : s32, Src);
+ MIRBuilder.buildTrunc(Dst, Casted);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ if (DstTy == s64) {
+ APInt TwoPExpInt = APInt::getSignMask(DstSizeInBits);
+ APFloat TwoPExpFP(SrcTy == s32 ? APFloat::IEEEsingle()
+ : APFloat::IEEEdouble(),
+ APInt::getZero(SrcTy.getSizeInBits()));
+ TwoPExpFP.convertFromAPInt(TwoPExpInt, /*IsSigned=*/false,
+ APFloat::rmNearestTiesToEven);
+
+ // For fp Src greater or equal to Threshold(2^Exp), we use FPTOSI on
+ // (Src - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
+ // For fp Src smaller, (Src - 2^Exp) is zeroed by And, the final result
+ // is FPTOSI on Src.
+ auto Casted = MIRBuilder.buildFPTOSI(DstTy, Src);
+ auto Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
+ auto FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
+ auto ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
+ auto Shift = MIRBuilder.buildConstant(DstTy, DstSizeInBits - 1);
+ auto ResHighBit = MIRBuilder.buildAShr(DstTy, Casted, Shift);
+ auto And = MIRBuilder.buildAnd(DstTy, ResHighBit, ResLowBits);
+ MIRBuilder.buildOr(Dst, And, Casted);
+ MI.eraseFromParent();
+ return true;
+ }
+ return false;
+}
+
+bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+ const LLT s32 = LLT::scalar(32);
+ const LLT s64 = LLT::scalar(64);
+
+ // Simply reuse SITOFP when it is possible to widen the type
+ if (SrcTy.getSizeInBits() <= 32) {
+ auto Ext = MIRBuilder.buildZExt(SrcTy == s32 ? s64 : s32, Src);
+ MIRBuilder.buildSITOFP(Dst, Ext);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ if (SrcTy == s64 && DstTy == s32)
+ return Helper.lowerU64ToF32WithSITOFP(MI) !=
+ LegalizerHelper::LegalizeResult::UnableToLegalize;
+
+ if (SrcTy == s64 && DstTy == s64)
+ // TODO: rewrite with vector shuffles when supported.
+ return Helper.lowerU64ToF64BitFloatOps(MI) !=
+ LegalizerHelper::LegalizeResult::UnableToLegalize;
----------------
e-kud wrote:
Unfortunately no, we still don't distinguish between smaller types promoted to `s32` and original `s32` because after widening `s8->s32` we start rule iteration over and have no clue whether we should widen type to `s64` or not.
https://github.com/llvm/llvm-project/pull/100079
More information about the llvm-commits
mailing list