[llvm] [X86][GlobalISel] Enable scalar versions of G_UITOFP and G_FPTOUI (PR #100079)

Tue Sep 17 04:29:22 PDT 2024

https://github.com/e-kud updated https://github.com/llvm/llvm-project/pull/100079

>From e9d7c68c9dbbedcd09ef269efd8f7aefc4dcd6ef Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov <evgenii.kudriashov at intel.com>
Date: Tue, 16 Jul 2024 08:13:08 -0700
Subject: [PATCH 1/5] [X86][GlobalISel] Enable scalar versions of G_UITOFP and
 G_FPTOUI

Also add tests for G_SITOFP and G_FPTOSI
---
 .../lib/Target/X86/GISel/X86LegalizerInfo.cpp | 157 +++++++++
 llvm/lib/Target/X86/GISel/X86LegalizerInfo.h  |   6 +
 .../Target/X86/GISel/X86RegisterBankInfo.cpp  |  10 +-
 llvm/test/CodeGen/X86/isel-fp-to-int.ll       | 323 ++++++++++++++++++
 llvm/test/CodeGen/X86/isel-int-to-fp.ll       | 320 +++++++++++++++++
 5 files changed, 812 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/isel-fp-to-int.ll
 create mode 100644 llvm/test/CodeGen/X86/isel-int-to-fp.ll

diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index dd8ecf6ef7fc76..0f1d3343630d76 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -497,6 +497,53 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
       .clampScalar(0, s32, sMaxScalar)
       .widenScalarToNextPow2(1);
 
+  // For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize s16
+  // manually. Otherwise, in custom handler there is no way to understand
+  // whether s32 is an original type and we need to promote it to s64 or s32 is
+  // obtained after widening s16 and we shouldn't widen it to s64.
+  //
+  // For AVX512 we simply widen types as there is direct mapping from opcodes
+  // to asm instructions.
+  getActionDefinitionsBuilder(G_UITOFP)
+      .legalIf([=](const LegalityQuery &Query) {
+        return HasAVX512 && typeInSet(0, {s32, s64})(Query) &&
+               typeInSet(1, {s32, s64})(Query);
+      })
+      .customIf([=](const LegalityQuery &Query) {
+        if (HasAVX512)
+          return false;
+        return (HasSSE1 &&
+                (typePairInSet(0, 1, {{s32, s32}, {s32, s16}})(Query) ||
+                 (Is64Bit && typePairInSet(0, 1, {{s32, s64}})(Query)))) ||
+               (HasSSE2 &&
+                (typePairInSet(0, 1, {{s64, s32}, {s64, s16}})(Query) ||
+                 (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query))));
+      })
+      .clampScalar(1, HasAVX512 ? s32 : s16, sMaxScalar)
+      .widenScalarToNextPow2(1)
+      .clampScalar(0, s32, HasSSE2 ? s64 : s32)
+      .widenScalarToNextPow2(0);
+
+  getActionDefinitionsBuilder(G_FPTOUI)
+      .legalIf([=](const LegalityQuery &Query) {
+        return HasAVX512 && typeInSet(0, {s32, s64})(Query) &&
+               typeInSet(1, {s32, s64})(Query);
+      })
+      .customIf([=](const LegalityQuery &Query) {
+        if (HasAVX512)
+          return false;
+        return (HasSSE1 &&
+                (typePairInSet(0, 1, {{s32, s32}, {s16, s32}})(Query) ||
+                 (Is64Bit && typePairInSet(0, 1, {{s64, s32}})(Query)))) ||
+               (HasSSE2 &&
+                (typePairInSet(0, 1, {{s32, s64}, {s16, s64}})(Query) ||
+                 (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query))));
+      })
+      .clampScalar(1, s32, sMaxScalar)
+      .widenScalarToNextPow2(1)
+      .clampScalar(0, HasAVX512 ? s32 : s16, HasSSE2 ? s64 : s32)
+      .widenScalarToNextPow2(0);
+
   // vector ops
   getActionDefinitionsBuilder(G_BUILD_VECTOR)
       .customIf([=](const LegalityQuery &Query) {
@@ -589,6 +636,10 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
     return false;
   case TargetOpcode::G_BUILD_VECTOR:
     return legalizeBuildVector(MI, MRI, Helper);
+  case TargetOpcode::G_FPTOUI:
+    return legalizeFPTOUI(MI, MRI, Helper);
+  case TargetOpcode::G_UITOFP:
+    return legalizeUITOFP(MI, MRI, Helper);
   }
   llvm_unreachable("expected switch to return");
 }
@@ -644,6 +695,112 @@ bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI,
   return true;
 }
 
+bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI,
+                                      MachineRegisterInfo &MRI,
+                                      LegalizerHelper &Helper) const {
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+  unsigned DstSizeInBits = DstTy.getScalarSizeInBits();
+  const LLT s32 = LLT::scalar(32);
+  const LLT s64 = LLT::scalar(64);
+
+  // Simply reuse FPTOSI when it is possible to widen the type
+  if (DstSizeInBits == 16 || DstSizeInBits == 32) {
+    auto Casted = MIRBuilder.buildFPTOSI(LLT::scalar(DstSizeInBits * 2), Src);
+    MIRBuilder.buildTrunc(Dst, Casted);
+    MI.eraseFromParent();
+    return true;
+  }
+  if (DstTy == s64) {
+    APInt TwoPExpInt = APInt::getSignMask(DstSizeInBits);
+    APFloat TwoPExpFP(SrcTy == s32 ? APFloat::IEEEsingle()
+                                   : APFloat::IEEEdouble(),
+                      APInt::getZero(SrcTy.getSizeInBits()));
+    TwoPExpFP.convertFromAPInt(TwoPExpInt, /*IsSigned=*/false,
+                               APFloat::rmNearestTiesToEven);
+
+    // For fp Src greater or equal to Threshold(2^Exp), we use FPTOSI on
+    // (Src - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
+    // For fp Src smaller, (Src - 2^Exp) is zeroed by And, the final result
+    // is FPTOSI on Src.
+    auto Casted = MIRBuilder.buildFPTOSI(DstTy, Src);
+    auto Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
+    auto FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
+    auto ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
+    auto Shift = MIRBuilder.buildConstant(DstTy, DstSizeInBits - 1);
+    auto ResHighBit = MIRBuilder.buildAShr(DstTy, Casted, Shift);
+    auto And = MIRBuilder.buildAnd(DstTy, ResHighBit, ResLowBits);
+    MIRBuilder.buildOr(Dst, And, Casted);
+    MI.eraseFromParent();
+    return true;
+  }
+  return false;
+}
+
+bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI,
+                                      MachineRegisterInfo &MRI,
+                                      LegalizerHelper &Helper) const {
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+  const LLT s16 = LLT::scalar(16);
+  const LLT s32 = LLT::scalar(32);
+  const LLT s64 = LLT::scalar(64);
+
+  // Simply reuse SITOFP when it is possible to widen the type
+  if (SrcTy == s16 || SrcTy == s32) {
+    const LLT WidenTy = LLT::scalar(SrcTy.getScalarSizeInBits() * 2);
+    auto Ext = MIRBuilder.buildZExt(WidenTy, Src);
+    MIRBuilder.buildSITOFP(Dst, Ext);
+    MI.eraseFromParent();
+    return true;
+  }
+  if (SrcTy == s64 && DstTy == s32) {
+    // For i64 < INT_MAX we simply reuse SITOFP.
+    // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
+    // saved before division, convert to float by SITOFP, multiply the result
+    // by 2.
+    auto SmallResult = MIRBuilder.buildSITOFP(DstTy, Src);
+    auto One = MIRBuilder.buildConstant(SrcTy, 1);
+    auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
+    auto Halved = MIRBuilder.buildLShr(SrcTy, Src, One);
+    auto LowerBit = MIRBuilder.buildAnd(SrcTy, Src, One);
+    auto RoundedHalved = MIRBuilder.buildOr(SrcTy, Halved, LowerBit);
+    auto HalvedFP = MIRBuilder.buildSITOFP(DstTy, RoundedHalved);
+    auto LargeResult = MIRBuilder.buildFAdd(DstTy, HalvedFP, HalvedFP);
+    auto IsLarge = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT,
+                                        LLT::scalar(1), Src, Zero);
+    MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
+    MI.eraseFromParent();
+    return true;
+  }
+  if (SrcTy == s64 && DstTy == s64) {
+    // TODO: rewrite on vector shuffles when supported.
+    // We create doubles from 32 bit parts with 32 exponent difference.
+    //
+    // X = 2^52 * 1.0...LowBits
+    // Y = 2^84 * 1.0...HighBits
+    // Temp = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
+    //      = - 2^52 * 1.0...HighBits
+    // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
+    auto TwoP52 = MIRBuilder.buildConstant(s64, UINT64_C(0x4330000000000000));
+    auto TwoP84 = MIRBuilder.buildConstant(s64, UINT64_C(0x4530000000000000));
+    auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
+    auto TwoP52P84FP = MIRBuilder.buildFConstant(s64, TwoP52P84);
+    auto HalfWidth = MIRBuilder.buildConstant(s64, 32);
+
+    auto LowBits = MIRBuilder.buildTrunc(s32, Src);
+    LowBits = MIRBuilder.buildZExt(s64, LowBits);
+    auto LowBitsFP = MIRBuilder.buildOr(s64, TwoP52, LowBits);
+    auto HighBits = MIRBuilder.buildLShr(s64, Src, HalfWidth);
+    auto HighBitsFP = MIRBuilder.buildOr(s64, TwoP84, HighBits);
+    auto Scratch = MIRBuilder.buildFSub(s64, HighBitsFP, TwoP52P84FP);
+    MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
+    MI.eraseFromParent();
+    return true;
+  }
+  return false;
+}
+
 bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
                                          MachineInstr &MI) const {
   return true;
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
index 229a58986903d4..39bd9892e2f16e 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
@@ -39,6 +39,12 @@ class X86LegalizerInfo : public LegalizerInfo {
 private:
   bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI,
                            LegalizerHelper &Helper) const;
+
+  bool legalizeFPTOUI(MachineInstr &MI, MachineRegisterInfo &MRI,
+                      LegalizerHelper &Helper) const;
+
+  bool legalizeUITOFP(MachineInstr &MI, MachineRegisterInfo &MRI,
+                      LegalizerHelper &Helper) const;
 };
 } // namespace llvm
 #endif
diff --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
index 9e85424e76e620..43d52fc0e5c3b4 100644
--- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
@@ -323,7 +323,9 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ true, OpRegBankIdx);
     break;
   case TargetOpcode::G_SITOFP:
-  case TargetOpcode::G_FPTOSI: {
+  case TargetOpcode::G_FPTOSI:
+  case TargetOpcode::G_UITOFP:
+  case TargetOpcode::G_FPTOUI: {
     // Some of the floating-point instructions have mixed GPR and FP
     // operands: fine-tune the computed mapping.
     auto &Op0 = MI.getOperand(0);
@@ -331,10 +333,10 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     const LLT Ty0 = MRI.getType(Op0.getReg());
     const LLT Ty1 = MRI.getType(Op1.getReg());
 
-    bool FirstArgIsFP = Opc == TargetOpcode::G_SITOFP;
-    bool SecondArgIsFP = Opc == TargetOpcode::G_FPTOSI;
+    bool FirstArgIsFP =
+        Opc == TargetOpcode::G_SITOFP || Opc == TargetOpcode::G_UITOFP;
     OpRegBankIdx[0] = getPartialMappingIdx(MI, Ty0, /* isFP= */ FirstArgIsFP);
-    OpRegBankIdx[1] = getPartialMappingIdx(MI, Ty1, /* isFP= */ SecondArgIsFP);
+    OpRegBankIdx[1] = getPartialMappingIdx(MI, Ty1, /* isFP= */ !FirstArgIsFP);
     break;
   }
   case TargetOpcode::G_FCMP: {
diff --git a/llvm/test/CodeGen/X86/isel-fp-to-int.ll b/llvm/test/CodeGen/X86/isel-fp-to-int.ll
new file mode 100644
index 00000000000000..0dcca65e06d4a1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fp-to-int.ll
@@ -0,0 +1,323 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s                                   -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,SDAG-X64
+; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,GISEL-X64
+; RUN: llc < %s                                   -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,SDAG-AVX512
+; RUN: llc < %s -global-isel -global-isel-abort=1 -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,GISEL-AVX512
+
+define i64 @test_double_to_ui64(double %x) {
+; SDAG-X64-LABEL: test_double_to_ui64:
+; SDAG-X64:       # %bb.0: # %entry
+; SDAG-X64-NEXT:    cvttsd2si %xmm0, %rcx
+; SDAG-X64-NEXT:    movq %rcx, %rdx
+; SDAG-X64-NEXT:    sarq $63, %rdx
+; SDAG-X64-NEXT:    subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SDAG-X64-NEXT:    cvttsd2si %xmm0, %rax
+; SDAG-X64-NEXT:    andq %rdx, %rax
+; SDAG-X64-NEXT:    orq %rcx, %rax
+; SDAG-X64-NEXT:    retq
+;
+; GISEL-X64-LABEL: test_double_to_ui64:
+; GISEL-X64:       # %bb.0: # %entry
+; GISEL-X64-NEXT:    cvttsd2si %xmm0, %rcx
+; GISEL-X64-NEXT:    movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
+; GISEL-X64-NEXT:    subsd %xmm1, %xmm0
+; GISEL-X64-NEXT:    cvttsd2si %xmm0, %rdx
+; GISEL-X64-NEXT:    movq %rcx, %rax
+; GISEL-X64-NEXT:    sarq $63, %rax
+; GISEL-X64-NEXT:    andq %rdx, %rax
+; GISEL-X64-NEXT:    orq %rcx, %rax
+; GISEL-X64-NEXT:    retq
+;
+; AVX512-LABEL: test_double_to_ui64:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttsd2usi %xmm0, %rax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptoui double %x to i64
+  ret i64 %conv
+}
+
+define i32 @test_double_to_ui32(double %x) {
+; X64-LABEL: test_double_to_ui32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttsd2si %xmm0, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_double_to_ui32:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttsd2usi %xmm0, %eax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptoui double %x to i32
+  ret i32 %conv
+}
+
+define zeroext i16 @test_double_to_ui16(double %x) {
+; X64-LABEL: test_double_to_ui16:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttsd2si %xmm0, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+;
+; SDAG-AVX512-LABEL: test_double_to_ui16:
+; SDAG-AVX512:       # %bb.0: # %entry
+; SDAG-AVX512-NEXT:    vcvttsd2si %xmm0, %eax
+; SDAG-AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
+; SDAG-AVX512-NEXT:    retq
+;
+; GISEL-AVX512-LABEL: test_double_to_ui16:
+; GISEL-AVX512:       # %bb.0: # %entry
+; GISEL-AVX512-NEXT:    vcvttsd2usi %xmm0, %eax
+; GISEL-AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
+; GISEL-AVX512-NEXT:    retq
+entry:
+  %conv = fptoui double %x to i16
+  ret i16 %conv
+}
+
+define zeroext i8 @test_double_to_ui8(double %x) {
+; X64-LABEL: test_double_to_ui8:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttsd2si %xmm0, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+;
+; SDAG-AVX512-LABEL: test_double_to_ui8:
+; SDAG-AVX512:       # %bb.0: # %entry
+; SDAG-AVX512-NEXT:    vcvttsd2si %xmm0, %eax
+; SDAG-AVX512-NEXT:    # kill: def $al killed $al killed $eax
+; SDAG-AVX512-NEXT:    retq
+;
+; GISEL-AVX512-LABEL: test_double_to_ui8:
+; GISEL-AVX512:       # %bb.0: # %entry
+; GISEL-AVX512-NEXT:    vcvttsd2usi %xmm0, %eax
+; GISEL-AVX512-NEXT:    # kill: def $al killed $al killed $eax
+; GISEL-AVX512-NEXT:    retq
+entry:
+  %conv = fptoui double %x to i8
+  ret i8 %conv
+}
+
+define i64 @test_float_to_ui64(float %x) {
+; SDAG-X64-LABEL: test_float_to_ui64:
+; SDAG-X64:       # %bb.0: # %entry
+; SDAG-X64-NEXT:    cvttss2si %xmm0, %rcx
+; SDAG-X64-NEXT:    movq %rcx, %rdx
+; SDAG-X64-NEXT:    sarq $63, %rdx
+; SDAG-X64-NEXT:    subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SDAG-X64-NEXT:    cvttss2si %xmm0, %rax
+; SDAG-X64-NEXT:    andq %rdx, %rax
+; SDAG-X64-NEXT:    orq %rcx, %rax
+; SDAG-X64-NEXT:    retq
+;
+; GISEL-X64-LABEL: test_float_to_ui64:
+; GISEL-X64:       # %bb.0: # %entry
+; GISEL-X64-NEXT:    cvttss2si %xmm0, %rcx
+; GISEL-X64-NEXT:    movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
+; GISEL-X64-NEXT:    subss %xmm1, %xmm0
+; GISEL-X64-NEXT:    cvttss2si %xmm0, %rdx
+; GISEL-X64-NEXT:    movq %rcx, %rax
+; GISEL-X64-NEXT:    sarq $63, %rax
+; GISEL-X64-NEXT:    andq %rdx, %rax
+; GISEL-X64-NEXT:    orq %rcx, %rax
+; GISEL-X64-NEXT:    retq
+;
+; AVX512-LABEL: test_float_to_ui64:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttss2usi %xmm0, %rax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptoui float %x to i64
+  ret i64 %conv
+}
+
+define i32 @test_float_to_ui32(float %x) {
+; X64-LABEL: test_float_to_ui32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttss2si %xmm0, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_float_to_ui32:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttss2usi %xmm0, %eax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptoui float %x to i32
+  ret i32 %conv
+}
+
+define zeroext i16 @test_float_to_ui16(float %x) {
+; X64-LABEL: test_float_to_ui16:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttss2si %xmm0, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+;
+; SDAG-AVX512-LABEL: test_float_to_ui16:
+; SDAG-AVX512:       # %bb.0: # %entry
+; SDAG-AVX512-NEXT:    vcvttss2si %xmm0, %eax
+; SDAG-AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
+; SDAG-AVX512-NEXT:    retq
+;
+; GISEL-AVX512-LABEL: test_float_to_ui16:
+; GISEL-AVX512:       # %bb.0: # %entry
+; GISEL-AVX512-NEXT:    vcvttss2usi %xmm0, %eax
+; GISEL-AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
+; GISEL-AVX512-NEXT:    retq
+entry:
+  %conv = fptoui float %x to i16
+  ret i16 %conv
+}
+
+define zeroext i8 @test_float_to_ui8(float %x) {
+; X64-LABEL: test_float_to_ui8:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttss2si %xmm0, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+;
+; SDAG-AVX512-LABEL: test_float_to_ui8:
+; SDAG-AVX512:       # %bb.0: # %entry
+; SDAG-AVX512-NEXT:    vcvttss2si %xmm0, %eax
+; SDAG-AVX512-NEXT:    # kill: def $al killed $al killed $eax
+; SDAG-AVX512-NEXT:    retq
+;
+; GISEL-AVX512-LABEL: test_float_to_ui8:
+; GISEL-AVX512:       # %bb.0: # %entry
+; GISEL-AVX512-NEXT:    vcvttss2usi %xmm0, %eax
+; GISEL-AVX512-NEXT:    # kill: def $al killed $al killed $eax
+; GISEL-AVX512-NEXT:    retq
+entry:
+  %conv = fptoui float %x to i8
+  ret i8 %conv
+}
+
+define i64 @test_double_to_si64(double %x) {
+; X64-LABEL: test_double_to_si64:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttsd2si %xmm0, %rax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_double_to_si64:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttsd2si %xmm0, %rax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptosi double %x to i64
+  ret i64 %conv
+}
+
+define i32 @test_double_to_si32(double %x) {
+; X64-LABEL: test_double_to_si32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttsd2si %xmm0, %eax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_double_to_si32:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttsd2si %xmm0, %eax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptosi double %x to i32
+  ret i32 %conv
+}
+
+define signext i16 @test_double_to_si16(double %x) {
+; X64-LABEL: test_double_to_si16:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttsd2si %xmm0, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_double_to_si16:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttsd2si %xmm0, %eax
+; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptosi double %x to i16
+  ret i16 %conv
+}
+
+define signext i8 @test_double_to_si8(double %x) {
+; X64-LABEL: test_double_to_si8:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttsd2si %xmm0, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_double_to_si8:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttsd2si %xmm0, %eax
+; AVX512-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptosi double %x to i8
+  ret i8 %conv
+}
+
+define i64 @test_float_to_si64(float %x) {
+; X64-LABEL: test_float_to_si64:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttss2si %xmm0, %rax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_float_to_si64:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttss2si %xmm0, %rax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptosi float %x to i64
+  ret i64 %conv
+}
+
+define i32 @test_float_to_si32(float %x) {
+; X64-LABEL: test_float_to_si32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttss2si %xmm0, %eax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_float_to_si32:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttss2si %xmm0, %eax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptosi float %x to i32
+  ret i32 %conv
+}
+
+define signext i16 @test_float_to_si16(float %x) {
+; X64-LABEL: test_float_to_si16:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttss2si %xmm0, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_float_to_si16:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttss2si %xmm0, %eax
+; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptosi float %x to i16
+  ret i16 %conv
+}
+
+define signext i8 @test_float_to_si8(float %x) {
+; X64-LABEL: test_float_to_si8:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttss2si %xmm0, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_float_to_si8:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttss2si %xmm0, %eax
+; AVX512-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptosi float %x to i8
+  ret i8 %conv
+}
diff --git a/llvm/test/CodeGen/X86/isel-int-to-fp.ll b/llvm/test/CodeGen/X86/isel-int-to-fp.ll
new file mode 100644
index 00000000000000..2f2f0f03719d46
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-int-to-fp.ll
@@ -0,0 +1,320 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s                                   -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,SDAG-X64
+; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,GISEL-X64
+; RUN: llc < %s                                   -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,SDAG-AVX512
+; RUN: llc < %s -global-isel -global-isel-abort=1 -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,GISEL-AVX512
+
+define double @test_ui64_to_double(i64 %x) {
+; SDAG-X64-LABEL: test_ui64_to_double:
+; SDAG-X64:       # %bb.0: # %entry
+; SDAG-X64-NEXT:    movq %rdi, %xmm1
+; SDAG-X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; SDAG-X64-NEXT:    subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SDAG-X64-NEXT:    movapd %xmm1, %xmm0
+; SDAG-X64-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; SDAG-X64-NEXT:    addsd %xmm1, %xmm0
+; SDAG-X64-NEXT:    retq
+;
+; GISEL-X64-LABEL: test_ui64_to_double:
+; GISEL-X64:       # %bb.0: # %entry
+; GISEL-X64-NEXT:    movabsq $4841369599423283200, %rax # imm = 0x4330000000000000
+; GISEL-X64-NEXT:    movabsq $4985484787499139072, %rcx # imm = 0x4530000000000000
+; GISEL-X64-NEXT:    movsd {{.*#+}} xmm0 = [1.9342813118337666E+25,0.0E+0]
+; GISEL-X64-NEXT:    movl $4294967295, %edx # imm = 0xFFFFFFFF
+; GISEL-X64-NEXT:    andq %rdi, %rdx
+; GISEL-X64-NEXT:    orq %rax, %rdx
+; GISEL-X64-NEXT:    shrq $32, %rdi
+; GISEL-X64-NEXT:    orq %rdi, %rcx
+; GISEL-X64-NEXT:    movq %rcx, %xmm1
+; GISEL-X64-NEXT:    subsd %xmm0, %xmm1
+; GISEL-X64-NEXT:    movq %rdx, %xmm0
+; GISEL-X64-NEXT:    addsd %xmm1, %xmm0
+; GISEL-X64-NEXT:    retq
+;
+; AVX512-LABEL: test_ui64_to_double:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvtusi2sd %rdi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %conv = uitofp i64 %x to double
+  ret double %conv
+}
+
+define double @test_ui32_to_double(i32 %x) {
+; X64-LABEL: test_ui32_to_double:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    cvtsi2sd %rax, %xmm0
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_ui32_to_double:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvtusi2sd %edi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %conv = uitofp i32 %x to double
+  ret double %conv
+}
+
+define double @test_ui16_to_double(i16 zeroext %x) {
+; X64-LABEL: test_ui16_to_double:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvtsi2sd %edi, %xmm0
+; X64-NEXT:    retq
+;
+; SDAG-AVX512-LABEL: test_ui16_to_double:
+; SDAG-AVX512:       # %bb.0: # %entry
+; SDAG-AVX512-NEXT:    vcvtsi2sd %edi, %xmm0, %xmm0
+; SDAG-AVX512-NEXT:    retq
+;
+; GISEL-AVX512-LABEL: test_ui16_to_double:
+; GISEL-AVX512:       # %bb.0: # %entry
+; GISEL-AVX512-NEXT:    vcvtusi2sd %edi, %xmm0, %xmm0
+; GISEL-AVX512-NEXT:    retq
+entry:
+  %conv = uitofp i16 %x to double
+  ret double %conv
+}
+
+define double @test_ui8_to_double(i8 zeroext %x) {
+; X64-LABEL: test_ui8_to_double:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvtsi2sd %edi, %xmm0
+; X64-NEXT:    retq
+;
+; SDAG-AVX512-LABEL: test_ui8_to_double:
+; SDAG-AVX512:       # %bb.0: # %entry
+; SDAG-AVX512-NEXT:    vcvtsi2sd %edi, %xmm0, %xmm0
+; SDAG-AVX512-NEXT:    retq
+;
+; GISEL-AVX512-LABEL: test_ui8_to_double:
+; GISEL-AVX512:       # %bb.0: # %entry
+; GISEL-AVX512-NEXT:    vcvtusi2sd %edi, %xmm0, %xmm0
+; GISEL-AVX512-NEXT:    retq
+entry:
+  %conv = uitofp i8 %x to double
+  ret double %conv
+}
+
+define float @test_ui64_to_float(i64 %x) {
+; SDAG-X64-LABEL: test_ui64_to_float:
+; SDAG-X64:       # %bb.0: # %entry
+; SDAG-X64-NEXT:    testq %rdi, %rdi
+; SDAG-X64-NEXT:    js .LBB4_1
+; SDAG-X64-NEXT:  # %bb.2: # %entry
+; SDAG-X64-NEXT:    cvtsi2ss %rdi, %xmm0
+; SDAG-X64-NEXT:    retq
+; SDAG-X64-NEXT:  .LBB4_1:
+; SDAG-X64-NEXT:    movq %rdi, %rax
+; SDAG-X64-NEXT:    shrq %rax
+; SDAG-X64-NEXT:    andl $1, %edi
+; SDAG-X64-NEXT:    orq %rax, %rdi
+; SDAG-X64-NEXT:    cvtsi2ss %rdi, %xmm0
+; SDAG-X64-NEXT:    addss %xmm0, %xmm0
+; SDAG-X64-NEXT:    retq
+;
+; GISEL-X64-LABEL: test_ui64_to_float:
+; GISEL-X64:       # %bb.0: # %entry
+; GISEL-X64-NEXT:    cvtsi2ss %rdi, %xmm0
+; GISEL-X64-NEXT:    movq %rdi, %rax
+; GISEL-X64-NEXT:    shrq %rax
+; GISEL-X64-NEXT:    movq %rdi, %rcx
+; GISEL-X64-NEXT:    andq $1, %rcx
+; GISEL-X64-NEXT:    orq %rax, %rcx
+; GISEL-X64-NEXT:    cvtsi2ss %rcx, %xmm1
+; GISEL-X64-NEXT:    addss %xmm1, %xmm1
+; GISEL-X64-NEXT:    xorl %eax, %eax
+; GISEL-X64-NEXT:    cmpq $0, %rdi
+; GISEL-X64-NEXT:    setl %al
+; GISEL-X64-NEXT:    andl $1, %eax
+; GISEL-X64-NEXT:    movd %xmm1, %eax
+; GISEL-X64-NEXT:    movd %xmm0, %ecx
+; GISEL-X64-NEXT:    cmovnel %eax, %ecx
+; GISEL-X64-NEXT:    movd %ecx, %xmm0
+; GISEL-X64-NEXT:    retq
+;
+; AVX512-LABEL: test_ui64_to_float:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvtusi2ss %rdi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %conv = uitofp i64 %x to float
+  ret float %conv
+}
+
+define float @test_ui32_to_float(i32 %x) {
+; X64-LABEL: test_ui32_to_float:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    cvtsi2ss %rax, %xmm0
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_ui32_to_float:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvtusi2ss %edi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %conv = uitofp i32 %x to float
+  ret float %conv
+}
+
+define float @test_ui16_to_float(i16 zeroext %x) {
+; X64-LABEL: test_ui16_to_float:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvtsi2ss %edi, %xmm0
+; X64-NEXT:    retq
+;
+; SDAG-AVX512-LABEL: test_ui16_to_float:
+; SDAG-AVX512:       # %bb.0: # %entry
+; SDAG-AVX512-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0
+; SDAG-AVX512-NEXT:    retq
+;
+; GISEL-AVX512-LABEL: test_ui16_to_float:
+; GISEL-AVX512:       # %bb.0: # %entry
+; GISEL-AVX512-NEXT:    vcvtusi2ss %edi, %xmm0, %xmm0
+; GISEL-AVX512-NEXT:    retq
+entry:
+  %conv = uitofp i16 %x to float
+  ret float %conv
+}
+
+define float @test_ui8_to_float(i8 zeroext %x) {
+; X64-LABEL: test_ui8_to_float:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvtsi2ss %edi, %xmm0
+; X64-NEXT:    retq
+;
+; SDAG-AVX512-LABEL: test_ui8_to_float:
+; SDAG-AVX512:       # %bb.0: # %entry
+; SDAG-AVX512-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0
+; SDAG-AVX512-NEXT:    retq
+;
+; GISEL-AVX512-LABEL: test_ui8_to_float:
+; GISEL-AVX512:       # %bb.0: # %entry
+; GISEL-AVX512-NEXT:    vcvtusi2ss %edi, %xmm0, %xmm0
+; GISEL-AVX512-NEXT:    retq
+entry:
+  %conv = uitofp i8 %x to float
+  ret float %conv
+}
+
+define double @test_si64_to_double(i64 %x) {
+; X64-LABEL: test_si64_to_double:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvtsi2sd %rdi, %xmm0
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_si64_to_double:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvtsi2sd %rdi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %conv = sitofp i64 %x to double
+  ret double %conv
+}
+
+define double @test_si32_to_double(i32 %x) {
+; X64-LABEL: test_si32_to_double:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvtsi2sd %edi, %xmm0
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_si32_to_double:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvtsi2sd %edi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %conv = sitofp i32 %x to double
+  ret double %conv
+}
+
+define double @test_si16_to_double(i16 signext %x) {
+; X64-LABEL: test_si16_to_double:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvtsi2sd %edi, %xmm0
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_si16_to_double:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvtsi2sd %edi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %conv = sitofp i16 %x to double
+  ret double %conv
+}
+
+define double @test_si8_to_double(i8 signext %x) {
+; X64-LABEL: test_si8_to_double:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvtsi2sd %edi, %xmm0
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_si8_to_double:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvtsi2sd %edi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %conv = sitofp i8 %x to double
+  ret double %conv
+}
+
+define float @test_si64_to_float(i64 %x) {
+; X64-LABEL: test_si64_to_float:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvtsi2ss %rdi, %xmm0
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_si64_to_float:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %conv = sitofp i64 %x to float
+  ret float %conv
+}
+
+define float @test_si32_to_float(i32 %x) {
+; X64-LABEL: test_si32_to_float:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvtsi2ss %edi, %xmm0
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_si32_to_float:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %conv = sitofp i32 %x to float
+  ret float %conv
+}
+
+define float @test_si16_to_float(i16 signext %x) {
+; X64-LABEL: test_si16_to_float:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvtsi2ss %edi, %xmm0
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_si16_to_float:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %conv = sitofp i16 %x to float
+  ret float %conv
+}
+
+define float @test_si8_to_float(i8 signext %x) {
+; X64-LABEL: test_si8_to_float:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvtsi2ss %edi, %xmm0
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_si8_to_float:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %conv = sitofp i8 %x to float
+  ret float %conv
+}
+

>From 20e9b7bcf740a8603bd7952ae9175decfde383d5 Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov <evgenii.kudriashov at intel.com>
Date: Tue, 23 Jul 2024 20:20:53 -0700
Subject: [PATCH 2/5] Make UITOFP legalization generic. Require 64bits for
 uint32 conversion

---
 .../llvm/CodeGen/GlobalISel/LegalizerHelper.h |  2 +
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    | 73 ++++++++++++++
 .../lib/Target/X86/GISel/X86LegalizerInfo.cpp | 99 ++++++-------------
 3 files changed, 105 insertions(+), 69 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index b17bc9aa2a44e5..c6dfb474d0855a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -391,6 +391,8 @@ class LegalizerHelper {
   LegalizeResult lowerRotate(MachineInstr &MI);
 
   LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI);
+  LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI);
+  LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI);
   LegalizeResult lowerUITOFP(MachineInstr &MI);
   LegalizeResult lowerSITOFP(MachineInstr &MI);
   LegalizeResult lowerFPTOUI(MachineInstr &MI);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 640a425ffa7357..d818e0c1bd6c70 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -6931,6 +6931,77 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
   return Legalized;
 }
 
+// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
+// operations and G_SITOFP
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerU64ToF32WithSITOFP(MachineInstr &MI) {
+  auto [Dst, Src] = MI.getFirst2Regs();
+  const LLT S64 = LLT::scalar(64);
+  const LLT S32 = LLT::scalar(32);
+  const LLT S1 = LLT::scalar(1);
+
+  assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
+
+  // For i64 < INT_MAX we simply reuse SITOFP.
+  // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
+  // saved before division, convert to float by SITOFP, multiply the result
+  // by 2.
+  auto One = MIRBuilder.buildConstant(S64, 1);
+  auto Zero = MIRBuilder.buildConstant(S64, 0);
+  // Result if Src < INT_MAX
+  auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
+  // Result if Src >= INT_MAX
+  auto Halved = MIRBuilder.buildLShr(S64, Src, One);
+  auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
+  auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
+  auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
+  auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
+  // Choose
+  auto IsLarge =
+      MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
+  MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
+// IEEE double representation.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerU64ToF64BitFloatOps(MachineInstr &MI) {
+  auto [Dst, Src] = MI.getFirst2Regs();
+  const LLT S64 = LLT::scalar(64);
+  const LLT S32 = LLT::scalar(32);
+
+  assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
+
+  // We create double value from 32 bit parts with 32 exponent difference.
+  // Note that + and - are float operations that adjust the implicit leading
+  // one, the bases 2^52 and 2^84 are for illustrative purposes.
+  //
+  // X = 2^52 * 1.0...LowBits
+  // Y = 2^84 * 1.0...HighBits
+  // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
+  //         = - 2^52 * 1.0...HighBits
+  // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
+  auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
+  auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
+  auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
+  auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
+  auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
+
+  auto LowBits = MIRBuilder.buildTrunc(S32, Src);
+  LowBits = MIRBuilder.buildZExt(S64, LowBits);
+  auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
+  auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
+  auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
+  auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
+  MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
 LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
   auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
 
@@ -6951,6 +7022,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
     // has sitofp, does not have CTLZ, or can efficiently use f64 as an
     // intermediate type, this is probably worse.
     return lowerU64ToF32BitOps(MI);
+  } else if (DstTy == LLT::scalar(64)) {
+    return lowerU64ToF64BitFloatOps(MI);
   }
 
   return UnableToLegalize;
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 0f1d3343630d76..0f8da9726bc194 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -509,39 +509,35 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
         return HasAVX512 && typeInSet(0, {s32, s64})(Query) &&
                typeInSet(1, {s32, s64})(Query);
       })
+      .clampScalar(0, s32, HasSSE2 ? s64 : s32)
+      .widenScalarToNextPow2(0)
       .customIf([=](const LegalityQuery &Query) {
         if (HasAVX512)
           return false;
-        return (HasSSE1 &&
-                (typePairInSet(0, 1, {{s32, s32}, {s32, s16}})(Query) ||
-                 (Is64Bit && typePairInSet(0, 1, {{s32, s64}})(Query)))) ||
-               (HasSSE2 &&
-                (typePairInSet(0, 1, {{s64, s32}, {s64, s16}})(Query) ||
-                 (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query))));
+        return ((HasSSE1 && typeIs(0, s32)(Query)) ||
+                (HasSSE2 && typeIs(0, s64)(Query))) &&
+               (scalarNarrowerThan(1, 32)(Query) ||
+                (Is64Bit && typeInSet(1, {s32, s64})(Query)));
       })
-      .clampScalar(1, HasAVX512 ? s32 : s16, sMaxScalar)
-      .widenScalarToNextPow2(1)
-      .clampScalar(0, s32, HasSSE2 ? s64 : s32)
-      .widenScalarToNextPow2(0);
+      .clampScalar(1, s32, sMaxScalar)
+      .widenScalarToNextPow2(1);
 
   getActionDefinitionsBuilder(G_FPTOUI)
       .legalIf([=](const LegalityQuery &Query) {
         return HasAVX512 && typeInSet(0, {s32, s64})(Query) &&
                typeInSet(1, {s32, s64})(Query);
       })
+      .clampScalar(1, s32, HasSSE2 ? s64 : s32)
+      .widenScalarToNextPow2(1)
       .customIf([=](const LegalityQuery &Query) {
         if (HasAVX512)
           return false;
-        return (HasSSE1 &&
-                (typePairInSet(0, 1, {{s32, s32}, {s16, s32}})(Query) ||
-                 (Is64Bit && typePairInSet(0, 1, {{s64, s32}})(Query)))) ||
-               (HasSSE2 &&
-                (typePairInSet(0, 1, {{s32, s64}, {s16, s64}})(Query) ||
-                 (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query))));
+        return ((HasSSE1 && typeIs(1, s32)(Query)) ||
+                (HasSSE2 && typeIs(1, s64)(Query))) &&
+               (scalarNarrowerThan(0, 32)(Query) ||
+                (Is64Bit && typeInSet(0, {s32, s64})(Query)));
       })
-      .clampScalar(1, s32, sMaxScalar)
-      .widenScalarToNextPow2(1)
-      .clampScalar(0, HasAVX512 ? s32 : s16, HasSSE2 ? s64 : s32)
+      .clampScalar(0, s32, sMaxScalar)
       .widenScalarToNextPow2(0);
 
   // vector ops
@@ -705,12 +701,13 @@ bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI,
   const LLT s64 = LLT::scalar(64);
 
   // Simply reuse FPTOSI when it is possible to widen the type
-  if (DstSizeInBits == 16 || DstSizeInBits == 32) {
-    auto Casted = MIRBuilder.buildFPTOSI(LLT::scalar(DstSizeInBits * 2), Src);
+  if (DstSizeInBits <= 32) {
+    auto Casted = MIRBuilder.buildFPTOSI(DstTy == s32 ? s64 : s32, Src);
     MIRBuilder.buildTrunc(Dst, Casted);
     MI.eraseFromParent();
     return true;
   }
+
   if (DstTy == s64) {
     APInt TwoPExpInt = APInt::getSignMask(DstSizeInBits);
     APFloat TwoPExpFP(SrcTy == s32 ? APFloat::IEEEsingle()
@@ -742,62 +739,26 @@ bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI,
                                       LegalizerHelper &Helper) const {
   MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
   auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
-  const LLT s16 = LLT::scalar(16);
   const LLT s32 = LLT::scalar(32);
   const LLT s64 = LLT::scalar(64);
 
   // Simply reuse SITOFP when it is possible to widen the type
-  if (SrcTy == s16 || SrcTy == s32) {
-    const LLT WidenTy = LLT::scalar(SrcTy.getScalarSizeInBits() * 2);
-    auto Ext = MIRBuilder.buildZExt(WidenTy, Src);
+  if (SrcTy.getSizeInBits() <= 32) {
+    auto Ext = MIRBuilder.buildZExt(SrcTy == s32 ? s64 : s32, Src);
     MIRBuilder.buildSITOFP(Dst, Ext);
     MI.eraseFromParent();
     return true;
   }
-  if (SrcTy == s64 && DstTy == s32) {
-    // For i64 < INT_MAX we simply reuse SITOFP.
-    // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
-    // saved before division, convert to float by SITOFP, multiply the result
-    // by 2.
-    auto SmallResult = MIRBuilder.buildSITOFP(DstTy, Src);
-    auto One = MIRBuilder.buildConstant(SrcTy, 1);
-    auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
-    auto Halved = MIRBuilder.buildLShr(SrcTy, Src, One);
-    auto LowerBit = MIRBuilder.buildAnd(SrcTy, Src, One);
-    auto RoundedHalved = MIRBuilder.buildOr(SrcTy, Halved, LowerBit);
-    auto HalvedFP = MIRBuilder.buildSITOFP(DstTy, RoundedHalved);
-    auto LargeResult = MIRBuilder.buildFAdd(DstTy, HalvedFP, HalvedFP);
-    auto IsLarge = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT,
-                                        LLT::scalar(1), Src, Zero);
-    MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
-    MI.eraseFromParent();
-    return true;
-  }
-  if (SrcTy == s64 && DstTy == s64) {
-    // TODO: rewrite on vector shuffles when supported.
-    // We create doubles from 32 bit parts with 32 exponent difference.
-    //
-    // X = 2^52 * 1.0...LowBits
-    // Y = 2^84 * 1.0...HighBits
-    // Temp = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
-    //      = - 2^52 * 1.0...HighBits
-    // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
-    auto TwoP52 = MIRBuilder.buildConstant(s64, UINT64_C(0x4330000000000000));
-    auto TwoP84 = MIRBuilder.buildConstant(s64, UINT64_C(0x4530000000000000));
-    auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
-    auto TwoP52P84FP = MIRBuilder.buildFConstant(s64, TwoP52P84);
-    auto HalfWidth = MIRBuilder.buildConstant(s64, 32);
-
-    auto LowBits = MIRBuilder.buildTrunc(s32, Src);
-    LowBits = MIRBuilder.buildZExt(s64, LowBits);
-    auto LowBitsFP = MIRBuilder.buildOr(s64, TwoP52, LowBits);
-    auto HighBits = MIRBuilder.buildLShr(s64, Src, HalfWidth);
-    auto HighBitsFP = MIRBuilder.buildOr(s64, TwoP84, HighBits);
-    auto Scratch = MIRBuilder.buildFSub(s64, HighBitsFP, TwoP52P84FP);
-    MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
-    MI.eraseFromParent();
-    return true;
-  }
+
+  if (SrcTy == s64 && DstTy == s32)
+    return Helper.lowerU64ToF32WithSITOFP(MI) !=
+           LegalizerHelper::LegalizeResult::UnableToLegalize;
+
+  if (SrcTy == s64 && DstTy == s64)
+    // TODO: rewrite with vector shuffles when supported.
+    return Helper.lowerU64ToF64BitFloatOps(MI) !=
+           LegalizerHelper::LegalizeResult::UnableToLegalize;
+
   return false;
 }
 

>From 7d68049202193a3047ae9cd5816193ab8adaa6ef Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov <evgenii.kudriashov at intel.com>
Date: Tue, 30 Jul 2024 15:14:05 -0700
Subject: [PATCH 3/5] Legalize 64bit conversions using lowerIf.

* New tests for i31 and i33
* Order doesn't matter much since after one action we start over
---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    | 14 ++--
 .../lib/Target/X86/GISel/X86LegalizerInfo.cpp | 46 ++++++------
 llvm/test/CodeGen/X86/isel-fp-to-int.ll       | 60 +++++++++++++++
 llvm/test/CodeGen/X86/isel-int-to-fp.ll       | 75 +++++++++++++++++++
 4 files changed, 163 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d818e0c1bd6c70..990ffad1620ee8 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7016,15 +7016,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
   if (SrcTy != LLT::scalar(64))
     return UnableToLegalize;
 
-  if (DstTy == LLT::scalar(32)) {
+  if (DstTy == LLT::scalar(32))
     // TODO: SelectionDAG has several alternative expansions to port which may
-    // be more reasonble depending on the available instructions. If a target
-    // has sitofp, does not have CTLZ, or can efficiently use f64 as an
-    // intermediate type, this is probably worse.
-    return lowerU64ToF32BitOps(MI);
-  } else if (DstTy == LLT::scalar(64)) {
+    // be more reasonable depending on the available instructions. We also need
+    // a more advanced mechanism to choose an optimal version depending on
+    // target features such as sitofp or CTLZ availability.
+    return lowerU64ToF32WithSITOFP(MI);
+
+  if (DstTy == LLT::scalar(64))
     return lowerU64ToF64BitFloatOps(MI);
-  }
 
   return UnableToLegalize;
 }
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 0f8da9726bc194..543998158f0063 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -497,10 +497,10 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
       .clampScalar(0, s32, sMaxScalar)
       .widenScalarToNextPow2(1);
 
-  // For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize s16
-  // manually. Otherwise, in custom handler there is no way to understand
-  // whether s32 is an original type and we need to promote it to s64 or s32 is
-  // obtained after widening s16 and we shouldn't widen it to s64.
+  // For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize types
+  // <= s32 manually. Otherwise, in custom handler there is no way to
+  // understand whether s32 is an original type and we need to promote it to
+  // s64 or s32 is obtained after widening and we shouldn't widen it to s64.
   //
   // For AVX512 we simply widen types as there is direct mapping from opcodes
   // to asm instructions.
@@ -509,16 +509,22 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
         return HasAVX512 && typeInSet(0, {s32, s64})(Query) &&
                typeInSet(1, {s32, s64})(Query);
       })
-      .clampScalar(0, s32, HasSSE2 ? s64 : s32)
-      .widenScalarToNextPow2(0)
+      .lowerIf([=](const LegalityQuery &Query) {
+        // Lower conversions from s64
+        return !HasAVX512 &&
+               ((HasSSE1 && typeIs(0, s32)(Query)) ||
+                (HasSSE2 && typeIs(0, s64)(Query))) &&
+               (Is64Bit && typeIs(1, s64)(Query));
+      })
       .customIf([=](const LegalityQuery &Query) {
-        if (HasAVX512)
-          return false;
-        return ((HasSSE1 && typeIs(0, s32)(Query)) ||
+        return !HasAVX512 &&
+               ((HasSSE1 && typeIs(0, s32)(Query)) ||
                 (HasSSE2 && typeIs(0, s64)(Query))) &&
                (scalarNarrowerThan(1, 32)(Query) ||
-                (Is64Bit && typeInSet(1, {s32, s64})(Query)));
+                (Is64Bit && typeIs(1, s32)(Query)));
       })
+      .clampScalar(0, s32, HasSSE2 ? s64 : s32)
+      .widenScalarToNextPow2(0)
       .clampScalar(1, s32, sMaxScalar)
       .widenScalarToNextPow2(1);
 
@@ -527,18 +533,17 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
         return HasAVX512 && typeInSet(0, {s32, s64})(Query) &&
                typeInSet(1, {s32, s64})(Query);
       })
-      .clampScalar(1, s32, HasSSE2 ? s64 : s32)
-      .widenScalarToNextPow2(1)
       .customIf([=](const LegalityQuery &Query) {
-        if (HasAVX512)
-          return false;
-        return ((HasSSE1 && typeIs(1, s32)(Query)) ||
+        return !HasAVX512 &&
+               ((HasSSE1 && typeIs(1, s32)(Query)) ||
                 (HasSSE2 && typeIs(1, s64)(Query))) &&
                (scalarNarrowerThan(0, 32)(Query) ||
                 (Is64Bit && typeInSet(0, {s32, s64})(Query)));
       })
       .clampScalar(0, s32, sMaxScalar)
-      .widenScalarToNextPow2(0);
+      .widenScalarToNextPow2(0)
+      .clampScalar(1, s32, HasSSE2 ? s64 : s32)
+      .widenScalarToNextPow2(1);
 
   // vector ops
   getActionDefinitionsBuilder(G_BUILD_VECTOR)
@@ -750,15 +755,6 @@ bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI,
     return true;
   }
 
-  if (SrcTy == s64 && DstTy == s32)
-    return Helper.lowerU64ToF32WithSITOFP(MI) !=
-           LegalizerHelper::LegalizeResult::UnableToLegalize;
-
-  if (SrcTy == s64 && DstTy == s64)
-    // TODO: rewrite with vector shuffles when supported.
-    return Helper.lowerU64ToF64BitFloatOps(MI) !=
-           LegalizerHelper::LegalizeResult::UnableToLegalize;
-
   return false;
 }
 
diff --git a/llvm/test/CodeGen/X86/isel-fp-to-int.ll b/llvm/test/CodeGen/X86/isel-fp-to-int.ll
index 0dcca65e06d4a1..8586af124fd2fc 100644
--- a/llvm/test/CodeGen/X86/isel-fp-to-int.ll
+++ b/llvm/test/CodeGen/X86/isel-fp-to-int.ll
@@ -258,6 +258,36 @@ entry:
   ret i8 %conv
 }
 
+define i31 @test_double_to_si31(double %x) {
+; X64-LABEL: test_double_to_si31:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttsd2si %xmm0, %eax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_double_to_si31:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttsd2si %xmm0, %eax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptosi double %x to i31
+  ret i31 %conv
+}
+
+define i33 @test_double_to_si33(double %x) {
+; X64-LABEL: test_double_to_si33:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttsd2si %xmm0, %rax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_double_to_si33:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttsd2si %xmm0, %rax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptosi double %x to i33
+  ret i33 %conv
+}
+
 define i64 @test_float_to_si64(float %x) {
 ; X64-LABEL: test_float_to_si64:
 ; X64:       # %bb.0: # %entry
@@ -321,3 +351,33 @@ entry:
   %conv = fptosi float %x to i8
   ret i8 %conv
 }
+
+define i31 @test_float_to_si31(float %x) {
+; X64-LABEL: test_float_to_si31:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttss2si %xmm0, %eax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_float_to_si31:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttss2si %xmm0, %eax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptosi float %x to i31
+  ret i31 %conv
+}
+
+define i33 @test_float_to_si33(float %x) {
+; X64-LABEL: test_float_to_si33:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttss2si %xmm0, %rax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_float_to_si33:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttss2si %xmm0, %rax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptosi float %x to i33
+  ret i33 %conv
+}
diff --git a/llvm/test/CodeGen/X86/isel-int-to-fp.ll b/llvm/test/CodeGen/X86/isel-int-to-fp.ll
index 2f2f0f03719d46..fc99ff95788f3c 100644
--- a/llvm/test/CodeGen/X86/isel-int-to-fp.ll
+++ b/llvm/test/CodeGen/X86/isel-int-to-fp.ll
@@ -258,6 +258,44 @@ entry:
   ret double %conv
 }
 
+define double @test_si31_to_double(i31 %x) {
+; X64-LABEL: test_si31_to_double:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    addl %edi, %edi
+; X64-NEXT:    sarl %edi
+; X64-NEXT:    cvtsi2sd %edi, %xmm0
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_si31_to_double:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    addl %edi, %edi
+; AVX512-NEXT:    sarl %edi
+; AVX512-NEXT:    vcvtsi2sd %edi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %conv = sitofp i31 %x to double
+  ret double %conv
+}
+
+define double @test_si33_to_double(i33 %x) {
+; X64-LABEL: test_si33_to_double:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    shlq $31, %rdi
+; X64-NEXT:    sarq $31, %rdi
+; X64-NEXT:    cvtsi2sd %rdi, %xmm0
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_si33_to_double:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    shlq $31, %rdi
+; AVX512-NEXT:    sarq $31, %rdi
+; AVX512-NEXT:    vcvtsi2sd %rdi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %conv = sitofp i33 %x to double
+  ret double %conv
+}
+
 define float @test_si64_to_float(i64 %x) {
 ; X64-LABEL: test_si64_to_float:
 ; X64:       # %bb.0: # %entry
@@ -318,3 +356,40 @@ entry:
   ret float %conv
 }
 
+define float @test_si31_to_float(i31 %x) {
+; X64-LABEL: test_si31_to_float:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    addl %edi, %edi
+; X64-NEXT:    sarl %edi
+; X64-NEXT:    cvtsi2ss %edi, %xmm0
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_si31_to_float:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    addl %edi, %edi
+; AVX512-NEXT:    sarl %edi
+; AVX512-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %conv = sitofp i31 %x to float
+  ret float %conv
+}
+
+define float @test_si33_to_float(i33 %x) {
+; X64-LABEL: test_si33_to_float:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    shlq $31, %rdi
+; X64-NEXT:    sarq $31, %rdi
+; X64-NEXT:    cvtsi2ss %rdi, %xmm0
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_si33_to_float:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    shlq $31, %rdi
+; AVX512-NEXT:    sarq $31, %rdi
+; AVX512-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %conv = sitofp i33 %x to float
+  ret float %conv
+}

>From 7e2987d4ac38532c8319ed8bc516898ed7e29b85 Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov <evgenii.kudriashov at intel.com>
Date: Fri, 6 Sep 2024 07:30:07 -0700
Subject: [PATCH 4/5] Don't use generic instructions for X86 specifics

---
 .../lib/Target/X86/GISel/X86LegalizerInfo.cpp | 35 ++++++-------------
 llvm/test/CodeGen/X86/isel-fp-to-int.ll       | 32 ++++++++++-------
 2 files changed, 31 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 24d84c3407574a..f5c56c310d2c0c 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -539,7 +539,17 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
                ((HasSSE1 && typeIs(1, s32)(Query)) ||
                 (HasSSE2 && typeIs(1, s64)(Query))) &&
                (scalarNarrowerThan(0, 32)(Query) ||
-                (Is64Bit && typeInSet(0, {s32, s64})(Query)));
+                (Is64Bit && typeIs(0, s32)(Query)));
+      })
+      // TODO: replace with customized legalization using
+      // specifics of cvttsd2si. The selection of this node requires
+      // a vector type. Either G_SCALAR_TO_VECTOR is needed or more advanced
+      // support of G_BUILD_VECTOR/G_INSERT_VECTOR_ELT is required beforehand.
+      .lowerIf([=](const LegalityQuery &Query) {
+        return !HasAVX512 &&
+               ((HasSSE1 && typeIs(1, s32)(Query)) ||
+                (HasSSE2 && typeIs(1, s64)(Query))) &&
+               (Is64Bit && typeIs(0, s64)(Query));
       })
       .clampScalar(0, s32, sMaxScalar)
       .widenScalarToNextPow2(0)
@@ -714,29 +724,6 @@ bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI,
     return true;
   }
 
-  if (DstTy == s64) {
-    APInt TwoPExpInt = APInt::getSignMask(DstSizeInBits);
-    APFloat TwoPExpFP(SrcTy == s32 ? APFloat::IEEEsingle()
-                                   : APFloat::IEEEdouble(),
-                      APInt::getZero(SrcTy.getSizeInBits()));
-    TwoPExpFP.convertFromAPInt(TwoPExpInt, /*IsSigned=*/false,
-                               APFloat::rmNearestTiesToEven);
-
-    // For fp Src greater or equal to Threshold(2^Exp), we use FPTOSI on
-    // (Src - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
-    // For fp Src smaller, (Src - 2^Exp) is zeroed by And, the final result
-    // is FPTOSI on Src.
-    auto Casted = MIRBuilder.buildFPTOSI(DstTy, Src);
-    auto Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
-    auto FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
-    auto ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
-    auto Shift = MIRBuilder.buildConstant(DstTy, DstSizeInBits - 1);
-    auto ResHighBit = MIRBuilder.buildAShr(DstTy, Casted, Shift);
-    auto And = MIRBuilder.buildAnd(DstTy, ResHighBit, ResLowBits);
-    MIRBuilder.buildOr(Dst, And, Casted);
-    MI.eraseFromParent();
-    return true;
-  }
   return false;
 }
 
diff --git a/llvm/test/CodeGen/X86/isel-fp-to-int.ll b/llvm/test/CodeGen/X86/isel-fp-to-int.ll
index 8586af124fd2fc..fae3db6ad0afa0 100644
--- a/llvm/test/CodeGen/X86/isel-fp-to-int.ll
+++ b/llvm/test/CodeGen/X86/isel-fp-to-int.ll
@@ -20,12 +20,16 @@ define i64 @test_double_to_ui64(double %x) {
 ; GISEL-X64:       # %bb.0: # %entry
 ; GISEL-X64-NEXT:    cvttsd2si %xmm0, %rcx
 ; GISEL-X64-NEXT:    movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
-; GISEL-X64-NEXT:    subsd %xmm1, %xmm0
-; GISEL-X64-NEXT:    cvttsd2si %xmm0, %rdx
-; GISEL-X64-NEXT:    movq %rcx, %rax
-; GISEL-X64-NEXT:    sarq $63, %rax
-; GISEL-X64-NEXT:    andq %rdx, %rax
-; GISEL-X64-NEXT:    orq %rcx, %rax
+; GISEL-X64-NEXT:    movapd %xmm0, %xmm2
+; GISEL-X64-NEXT:    subsd %xmm1, %xmm2
+; GISEL-X64-NEXT:    cvttsd2si %xmm2, %rdx
+; GISEL-X64-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; GISEL-X64-NEXT:    xorq %rdx, %rax
+; GISEL-X64-NEXT:    xorl %edx, %edx
+; GISEL-X64-NEXT:    ucomisd %xmm1, %xmm0
+; GISEL-X64-NEXT:    setb %dl
+; GISEL-X64-NEXT:    andl $1, %edx
+; GISEL-X64-NEXT:    cmovneq %rcx, %rax
 ; GISEL-X64-NEXT:    retq
 ;
 ; AVX512-LABEL: test_double_to_ui64:
@@ -115,12 +119,16 @@ define i64 @test_float_to_ui64(float %x) {
 ; GISEL-X64:       # %bb.0: # %entry
 ; GISEL-X64-NEXT:    cvttss2si %xmm0, %rcx
 ; GISEL-X64-NEXT:    movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
-; GISEL-X64-NEXT:    subss %xmm1, %xmm0
-; GISEL-X64-NEXT:    cvttss2si %xmm0, %rdx
-; GISEL-X64-NEXT:    movq %rcx, %rax
-; GISEL-X64-NEXT:    sarq $63, %rax
-; GISEL-X64-NEXT:    andq %rdx, %rax
-; GISEL-X64-NEXT:    orq %rcx, %rax
+; GISEL-X64-NEXT:    movaps %xmm0, %xmm2
+; GISEL-X64-NEXT:    subss %xmm1, %xmm2
+; GISEL-X64-NEXT:    cvttss2si %xmm2, %rdx
+; GISEL-X64-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; GISEL-X64-NEXT:    xorq %rdx, %rax
+; GISEL-X64-NEXT:    xorl %edx, %edx
+; GISEL-X64-NEXT:    ucomiss %xmm1, %xmm0
+; GISEL-X64-NEXT:    setb %dl
+; GISEL-X64-NEXT:    andl $1, %edx
+; GISEL-X64-NEXT:    cmovneq %rcx, %rax
 ; GISEL-X64-NEXT:    retq
 ;
 ; AVX512-LABEL: test_float_to_ui64:

>From bf232e7f7ff7cd55f67b156ada21fcefd26d5f90 Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov <evgenii.kudriashov at intel.com>
Date: Tue, 10 Sep 2024 08:11:10 -0700
Subject: [PATCH 5/5] Addressing the comments

---
 llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp |  3 ++-
 llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp  | 12 ++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 596ded12582d4f..280615047939eb 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7133,7 +7133,8 @@ LegalizerHelper::lowerU64ToF32WithSITOFP(MachineInstr &MI) {
   auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
   auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
   auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
-  // Choose
+  // Check if the original value is larger than INT_MAX by comparing with
+  // zero to pick one of the two conversions.
   auto IsLarge =
       MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
   MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index f5c56c310d2c0c..b203b57f0b683a 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -510,19 +510,19 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
         return HasAVX512 && typeInSet(0, {s32, s64})(Query) &&
                typeInSet(1, {s32, s64})(Query);
       })
-      .lowerIf([=](const LegalityQuery &Query) {
-        // Lower conversions from s64
+      .customIf([=](const LegalityQuery &Query) {
         return !HasAVX512 &&
                ((HasSSE1 && typeIs(0, s32)(Query)) ||
                 (HasSSE2 && typeIs(0, s64)(Query))) &&
-               (Is64Bit && typeIs(1, s64)(Query));
+               (scalarNarrowerThan(1, 32)(Query) ||
+                (Is64Bit && typeIs(1, s32)(Query)));
       })
-      .customIf([=](const LegalityQuery &Query) {
+      .lowerIf([=](const LegalityQuery &Query) {
+        // Lower conversions from s64
         return !HasAVX512 &&
                ((HasSSE1 && typeIs(0, s32)(Query)) ||
                 (HasSSE2 && typeIs(0, s64)(Query))) &&
-               (scalarNarrowerThan(1, 32)(Query) ||
-                (Is64Bit && typeIs(1, s32)(Query)));
+               (Is64Bit && typeIs(1, s64)(Query));
       })
       .clampScalar(0, s32, HasSSE2 ? s64 : s32)
       .widenScalarToNextPow2(0)