[llvm] [X86][GlobalISel] Enable scalar versions of G_UITOFP and G_FPTOUI (PR #100079)
Evgenii Kudriashov via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 23 10:48:18 PDT 2024
================
@@ -644,6 +695,112 @@ bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI,
return true;
}
+bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+ unsigned DstSizeInBits = DstTy.getScalarSizeInBits();
+ const LLT s32 = LLT::scalar(32);
+ const LLT s64 = LLT::scalar(64);
+
+ // Simply reuse FPTOSI when it is possible to widen the type
+ if (DstSizeInBits == 16 || DstSizeInBits == 32) {
+ auto Casted = MIRBuilder.buildFPTOSI(LLT::scalar(DstSizeInBits * 2), Src);
+ MIRBuilder.buildTrunc(Dst, Casted);
+ MI.eraseFromParent();
+ return true;
+ }
+ if (DstTy == s64) {
+ APInt TwoPExpInt = APInt::getSignMask(DstSizeInBits);
+ APFloat TwoPExpFP(SrcTy == s32 ? APFloat::IEEEsingle()
+ : APFloat::IEEEdouble(),
+ APInt::getZero(SrcTy.getSizeInBits()));
+ TwoPExpFP.convertFromAPInt(TwoPExpInt, /*IsSigned=*/false,
+ APFloat::rmNearestTiesToEven);
+
+ // For fp Src greater or equal to Threshold(2^Exp), we use FPTOSI on
+ // (Src - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
+ // For fp Src smaller, (Src - 2^Exp) is zeroed by And, the final result
+ // is FPTOSI on Src.
+ auto Casted = MIRBuilder.buildFPTOSI(DstTy, Src);
+ auto Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
+ auto FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
+ auto ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
+ auto Shift = MIRBuilder.buildConstant(DstTy, DstSizeInBits - 1);
+ auto ResHighBit = MIRBuilder.buildAShr(DstTy, Casted, Shift);
+ auto And = MIRBuilder.buildAnd(DstTy, ResHighBit, ResLowBits);
+ MIRBuilder.buildOr(Dst, And, Casted);
+ MI.eraseFromParent();
+ return true;
+ }
+ return false;
+}
+
+bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+ const LLT s16 = LLT::scalar(16);
+ const LLT s32 = LLT::scalar(32);
+ const LLT s64 = LLT::scalar(64);
+
+ // Simply reuse SITOFP when it is possible to widen the type
+ if (SrcTy == s16 || SrcTy == s32) {
+ const LLT WidenTy = LLT::scalar(SrcTy.getScalarSizeInBits() * 2);
+ auto Ext = MIRBuilder.buildZExt(WidenTy, Src);
+ MIRBuilder.buildSITOFP(Dst, Ext);
+ MI.eraseFromParent();
+ return true;
+ }
+ if (SrcTy == s64 && DstTy == s32) {
+ // For i64 < INT_MAX we simply reuse SITOFP.
+ // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
+ // saved before division, convert to float by SITOFP, multiply the result
+ // by 2.
+ auto SmallResult = MIRBuilder.buildSITOFP(DstTy, Src);
+ auto One = MIRBuilder.buildConstant(SrcTy, 1);
+ auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
+ auto Halved = MIRBuilder.buildLShr(SrcTy, Src, One);
+ auto LowerBit = MIRBuilder.buildAnd(SrcTy, Src, One);
+ auto RoundedHalved = MIRBuilder.buildOr(SrcTy, Halved, LowerBit);
+ auto HalvedFP = MIRBuilder.buildSITOFP(DstTy, RoundedHalved);
+ auto LargeResult = MIRBuilder.buildFAdd(DstTy, HalvedFP, HalvedFP);
+ auto IsLarge = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT,
+ LLT::scalar(1), Src, Zero);
+ MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
+ MI.eraseFromParent();
+ return true;
+ }
+ if (SrcTy == s64 && DstTy == s64) {
----------------
e-kud wrote:
Agreed, I'll extract it into `lowerUITOFP` it handles now only s64 to s32.
https://github.com/llvm/llvm-project/pull/100079
More information about the llvm-commits
mailing list