[llvm] [X86][GlobalISel] Enable scalar versions of G_UITOFP and G_FPTOUI (PR #100079)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 23 01:26:03 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Evgenii Kudriashov (e-kud)

<details>
<summary>Changes</summary>

Also add tests for G_SITOFP and G_FPTOSI

---

Patch is 30.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100079.diff


5 Files Affected:

- (modified) llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp (+157) 
- (modified) llvm/lib/Target/X86/GISel/X86LegalizerInfo.h (+6) 
- (modified) llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp (+6-4) 
- (added) llvm/test/CodeGen/X86/isel-fp-to-int.ll (+323) 
- (added) llvm/test/CodeGen/X86/isel-int-to-fp.ll (+320) 


``````````diff
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index dd8ecf6ef7fc7..0f1d3343630d7 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -497,6 +497,53 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
       .clampScalar(0, s32, sMaxScalar)
       .widenScalarToNextPow2(1);
 
+  // For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize s16
+  // manually. Otherwise, in custom handler there is no way to understand
+  // whether s32 is an original type and we need to promote it to s64 or s32 is
+  // obtained after widening s16 and we shouldn't widen it to s64.
+  //
+  // For AVX512 we simply widen types as there is direct mapping from opcodes
+  // to asm instructions.
+  getActionDefinitionsBuilder(G_UITOFP)
+      .legalIf([=](const LegalityQuery &Query) {
+        return HasAVX512 && typeInSet(0, {s32, s64})(Query) &&
+               typeInSet(1, {s32, s64})(Query);
+      })
+      .customIf([=](const LegalityQuery &Query) {
+        if (HasAVX512)
+          return false;
+        return (HasSSE1 &&
+                (typePairInSet(0, 1, {{s32, s32}, {s32, s16}})(Query) ||
+                 (Is64Bit && typePairInSet(0, 1, {{s32, s64}})(Query)))) ||
+               (HasSSE2 &&
+                (typePairInSet(0, 1, {{s64, s32}, {s64, s16}})(Query) ||
+                 (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query))));
+      })
+      .clampScalar(1, HasAVX512 ? s32 : s16, sMaxScalar)
+      .widenScalarToNextPow2(1)
+      .clampScalar(0, s32, HasSSE2 ? s64 : s32)
+      .widenScalarToNextPow2(0);
+
+  getActionDefinitionsBuilder(G_FPTOUI)
+      .legalIf([=](const LegalityQuery &Query) {
+        return HasAVX512 && typeInSet(0, {s32, s64})(Query) &&
+               typeInSet(1, {s32, s64})(Query);
+      })
+      .customIf([=](const LegalityQuery &Query) {
+        if (HasAVX512)
+          return false;
+        return (HasSSE1 &&
+                (typePairInSet(0, 1, {{s32, s32}, {s16, s32}})(Query) ||
+                 (Is64Bit && typePairInSet(0, 1, {{s64, s32}})(Query)))) ||
+               (HasSSE2 &&
+                (typePairInSet(0, 1, {{s32, s64}, {s16, s64}})(Query) ||
+                 (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query))));
+      })
+      .clampScalar(1, s32, sMaxScalar)
+      .widenScalarToNextPow2(1)
+      .clampScalar(0, HasAVX512 ? s32 : s16, HasSSE2 ? s64 : s32)
+      .widenScalarToNextPow2(0);
+
   // vector ops
   getActionDefinitionsBuilder(G_BUILD_VECTOR)
       .customIf([=](const LegalityQuery &Query) {
@@ -589,6 +636,10 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
     return false;
   case TargetOpcode::G_BUILD_VECTOR:
     return legalizeBuildVector(MI, MRI, Helper);
+  case TargetOpcode::G_FPTOUI:
+    return legalizeFPTOUI(MI, MRI, Helper);
+  case TargetOpcode::G_UITOFP:
+    return legalizeUITOFP(MI, MRI, Helper);
   }
   llvm_unreachable("expected switch to return");
 }
@@ -644,6 +695,112 @@ bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI,
   return true;
 }
 
+bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI,
+                                      MachineRegisterInfo &MRI,
+                                      LegalizerHelper &Helper) const {
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+  unsigned DstSizeInBits = DstTy.getScalarSizeInBits();
+  const LLT s32 = LLT::scalar(32);
+  const LLT s64 = LLT::scalar(64);
+
+  // Simply reuse FPTOSI when it is possible to widen the type
+  if (DstSizeInBits == 16 || DstSizeInBits == 32) {
+    auto Casted = MIRBuilder.buildFPTOSI(LLT::scalar(DstSizeInBits * 2), Src);
+    MIRBuilder.buildTrunc(Dst, Casted);
+    MI.eraseFromParent();
+    return true;
+  }
+  if (DstTy == s64) {
+    APInt TwoPExpInt = APInt::getSignMask(DstSizeInBits);
+    APFloat TwoPExpFP(SrcTy == s32 ? APFloat::IEEEsingle()
+                                   : APFloat::IEEEdouble(),
+                      APInt::getZero(SrcTy.getSizeInBits()));
+    TwoPExpFP.convertFromAPInt(TwoPExpInt, /*IsSigned=*/false,
+                               APFloat::rmNearestTiesToEven);
+
+    // For fp Src greater or equal to Threshold(2^Exp), we use FPTOSI on
+    // (Src - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
+    // For fp Src smaller, (Src - 2^Exp) is zeroed by And, the final result
+    // is FPTOSI on Src.
+    auto Casted = MIRBuilder.buildFPTOSI(DstTy, Src);
+    auto Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
+    auto FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
+    auto ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
+    auto Shift = MIRBuilder.buildConstant(DstTy, DstSizeInBits - 1);
+    auto ResHighBit = MIRBuilder.buildAShr(DstTy, Casted, Shift);
+    auto And = MIRBuilder.buildAnd(DstTy, ResHighBit, ResLowBits);
+    MIRBuilder.buildOr(Dst, And, Casted);
+    MI.eraseFromParent();
+    return true;
+  }
+  return false;
+}
+
+bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI,
+                                      MachineRegisterInfo &MRI,
+                                      LegalizerHelper &Helper) const {
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+  const LLT s16 = LLT::scalar(16);
+  const LLT s32 = LLT::scalar(32);
+  const LLT s64 = LLT::scalar(64);
+
+  // Simply reuse SITOFP when it is possible to widen the type
+  if (SrcTy == s16 || SrcTy == s32) {
+    const LLT WidenTy = LLT::scalar(SrcTy.getScalarSizeInBits() * 2);
+    auto Ext = MIRBuilder.buildZExt(WidenTy, Src);
+    MIRBuilder.buildSITOFP(Dst, Ext);
+    MI.eraseFromParent();
+    return true;
+  }
+  if (SrcTy == s64 && DstTy == s32) {
+    // For i64 < INT_MAX we simply reuse SITOFP.
+    // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
+    // saved before division, convert to float by SITOFP, multiply the result
+    // by 2.
+    auto SmallResult = MIRBuilder.buildSITOFP(DstTy, Src);
+    auto One = MIRBuilder.buildConstant(SrcTy, 1);
+    auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
+    auto Halved = MIRBuilder.buildLShr(SrcTy, Src, One);
+    auto LowerBit = MIRBuilder.buildAnd(SrcTy, Src, One);
+    auto RoundedHalved = MIRBuilder.buildOr(SrcTy, Halved, LowerBit);
+    auto HalvedFP = MIRBuilder.buildSITOFP(DstTy, RoundedHalved);
+    auto LargeResult = MIRBuilder.buildFAdd(DstTy, HalvedFP, HalvedFP);
+    auto IsLarge = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT,
+                                        LLT::scalar(1), Src, Zero);
+    MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
+    MI.eraseFromParent();
+    return true;
+  }
+  if (SrcTy == s64 && DstTy == s64) {
+    // TODO: rewrite on vector shuffles when supported.
+    // We create doubles from 32 bit parts with 32 exponent difference.
+    //
+    // X = 2^52 * 1.0...LowBits
+    // Y = 2^84 * 1.0...HighBits
+    // Temp = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
+    //      = - 2^52 * 1.0...HighBits
+    // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
+    auto TwoP52 = MIRBuilder.buildConstant(s64, UINT64_C(0x4330000000000000));
+    auto TwoP84 = MIRBuilder.buildConstant(s64, UINT64_C(0x4530000000000000));
+    auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
+    auto TwoP52P84FP = MIRBuilder.buildFConstant(s64, TwoP52P84);
+    auto HalfWidth = MIRBuilder.buildConstant(s64, 32);
+
+    auto LowBits = MIRBuilder.buildTrunc(s32, Src);
+    LowBits = MIRBuilder.buildZExt(s64, LowBits);
+    auto LowBitsFP = MIRBuilder.buildOr(s64, TwoP52, LowBits);
+    auto HighBits = MIRBuilder.buildLShr(s64, Src, HalfWidth);
+    auto HighBitsFP = MIRBuilder.buildOr(s64, TwoP84, HighBits);
+    auto Scratch = MIRBuilder.buildFSub(s64, HighBitsFP, TwoP52P84FP);
+    MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
+    MI.eraseFromParent();
+    return true;
+  }
+  return false;
+}
+
 bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
                                          MachineInstr &MI) const {
   return true;
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
index 229a58986903d..39bd9892e2f16 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
@@ -39,6 +39,12 @@ class X86LegalizerInfo : public LegalizerInfo {
 private:
   bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI,
                            LegalizerHelper &Helper) const;
+
+  bool legalizeFPTOUI(MachineInstr &MI, MachineRegisterInfo &MRI,
+                      LegalizerHelper &Helper) const;
+
+  bool legalizeUITOFP(MachineInstr &MI, MachineRegisterInfo &MRI,
+                      LegalizerHelper &Helper) const;
 };
 } // namespace llvm
 #endif
diff --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
index 9e85424e76e62..43d52fc0e5c3b 100644
--- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
@@ -323,7 +323,9 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ true, OpRegBankIdx);
     break;
   case TargetOpcode::G_SITOFP:
-  case TargetOpcode::G_FPTOSI: {
+  case TargetOpcode::G_FPTOSI:
+  case TargetOpcode::G_UITOFP:
+  case TargetOpcode::G_FPTOUI: {
     // Some of the floating-point instructions have mixed GPR and FP
     // operands: fine-tune the computed mapping.
     auto &Op0 = MI.getOperand(0);
@@ -331,10 +333,10 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     const LLT Ty0 = MRI.getType(Op0.getReg());
     const LLT Ty1 = MRI.getType(Op1.getReg());
 
-    bool FirstArgIsFP = Opc == TargetOpcode::G_SITOFP;
-    bool SecondArgIsFP = Opc == TargetOpcode::G_FPTOSI;
+    bool FirstArgIsFP =
+        Opc == TargetOpcode::G_SITOFP || Opc == TargetOpcode::G_UITOFP;
     OpRegBankIdx[0] = getPartialMappingIdx(MI, Ty0, /* isFP= */ FirstArgIsFP);
-    OpRegBankIdx[1] = getPartialMappingIdx(MI, Ty1, /* isFP= */ SecondArgIsFP);
+    OpRegBankIdx[1] = getPartialMappingIdx(MI, Ty1, /* isFP= */ !FirstArgIsFP);
     break;
   }
   case TargetOpcode::G_FCMP: {
diff --git a/llvm/test/CodeGen/X86/isel-fp-to-int.ll b/llvm/test/CodeGen/X86/isel-fp-to-int.ll
new file mode 100644
index 0000000000000..0dcca65e06d4a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fp-to-int.ll
@@ -0,0 +1,323 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s                                   -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,SDAG-X64
+; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,GISEL-X64
+; RUN: llc < %s                                   -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,SDAG-AVX512
+; RUN: llc < %s -global-isel -global-isel-abort=1 -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,GISEL-AVX512
+
+define i64 @test_double_to_ui64(double %x) {
+; SDAG-X64-LABEL: test_double_to_ui64:
+; SDAG-X64:       # %bb.0: # %entry
+; SDAG-X64-NEXT:    cvttsd2si %xmm0, %rcx
+; SDAG-X64-NEXT:    movq %rcx, %rdx
+; SDAG-X64-NEXT:    sarq $63, %rdx
+; SDAG-X64-NEXT:    subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SDAG-X64-NEXT:    cvttsd2si %xmm0, %rax
+; SDAG-X64-NEXT:    andq %rdx, %rax
+; SDAG-X64-NEXT:    orq %rcx, %rax
+; SDAG-X64-NEXT:    retq
+;
+; GISEL-X64-LABEL: test_double_to_ui64:
+; GISEL-X64:       # %bb.0: # %entry
+; GISEL-X64-NEXT:    cvttsd2si %xmm0, %rcx
+; GISEL-X64-NEXT:    movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
+; GISEL-X64-NEXT:    subsd %xmm1, %xmm0
+; GISEL-X64-NEXT:    cvttsd2si %xmm0, %rdx
+; GISEL-X64-NEXT:    movq %rcx, %rax
+; GISEL-X64-NEXT:    sarq $63, %rax
+; GISEL-X64-NEXT:    andq %rdx, %rax
+; GISEL-X64-NEXT:    orq %rcx, %rax
+; GISEL-X64-NEXT:    retq
+;
+; AVX512-LABEL: test_double_to_ui64:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttsd2usi %xmm0, %rax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptoui double %x to i64
+  ret i64 %conv
+}
+
+define i32 @test_double_to_ui32(double %x) {
+; X64-LABEL: test_double_to_ui32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttsd2si %xmm0, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_double_to_ui32:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttsd2usi %xmm0, %eax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptoui double %x to i32
+  ret i32 %conv
+}
+
+define zeroext i16 @test_double_to_ui16(double %x) {
+; X64-LABEL: test_double_to_ui16:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttsd2si %xmm0, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+;
+; SDAG-AVX512-LABEL: test_double_to_ui16:
+; SDAG-AVX512:       # %bb.0: # %entry
+; SDAG-AVX512-NEXT:    vcvttsd2si %xmm0, %eax
+; SDAG-AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
+; SDAG-AVX512-NEXT:    retq
+;
+; GISEL-AVX512-LABEL: test_double_to_ui16:
+; GISEL-AVX512:       # %bb.0: # %entry
+; GISEL-AVX512-NEXT:    vcvttsd2usi %xmm0, %eax
+; GISEL-AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
+; GISEL-AVX512-NEXT:    retq
+entry:
+  %conv = fptoui double %x to i16
+  ret i16 %conv
+}
+
+define zeroext i8 @test_double_to_ui8(double %x) {
+; X64-LABEL: test_double_to_ui8:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttsd2si %xmm0, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+;
+; SDAG-AVX512-LABEL: test_double_to_ui8:
+; SDAG-AVX512:       # %bb.0: # %entry
+; SDAG-AVX512-NEXT:    vcvttsd2si %xmm0, %eax
+; SDAG-AVX512-NEXT:    # kill: def $al killed $al killed $eax
+; SDAG-AVX512-NEXT:    retq
+;
+; GISEL-AVX512-LABEL: test_double_to_ui8:
+; GISEL-AVX512:       # %bb.0: # %entry
+; GISEL-AVX512-NEXT:    vcvttsd2usi %xmm0, %eax
+; GISEL-AVX512-NEXT:    # kill: def $al killed $al killed $eax
+; GISEL-AVX512-NEXT:    retq
+entry:
+  %conv = fptoui double %x to i8
+  ret i8 %conv
+}
+
+define i64 @test_float_to_ui64(float %x) {
+; SDAG-X64-LABEL: test_float_to_ui64:
+; SDAG-X64:       # %bb.0: # %entry
+; SDAG-X64-NEXT:    cvttss2si %xmm0, %rcx
+; SDAG-X64-NEXT:    movq %rcx, %rdx
+; SDAG-X64-NEXT:    sarq $63, %rdx
+; SDAG-X64-NEXT:    subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SDAG-X64-NEXT:    cvttss2si %xmm0, %rax
+; SDAG-X64-NEXT:    andq %rdx, %rax
+; SDAG-X64-NEXT:    orq %rcx, %rax
+; SDAG-X64-NEXT:    retq
+;
+; GISEL-X64-LABEL: test_float_to_ui64:
+; GISEL-X64:       # %bb.0: # %entry
+; GISEL-X64-NEXT:    cvttss2si %xmm0, %rcx
+; GISEL-X64-NEXT:    movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
+; GISEL-X64-NEXT:    subss %xmm1, %xmm0
+; GISEL-X64-NEXT:    cvttss2si %xmm0, %rdx
+; GISEL-X64-NEXT:    movq %rcx, %rax
+; GISEL-X64-NEXT:    sarq $63, %rax
+; GISEL-X64-NEXT:    andq %rdx, %rax
+; GISEL-X64-NEXT:    orq %rcx, %rax
+; GISEL-X64-NEXT:    retq
+;
+; AVX512-LABEL: test_float_to_ui64:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttss2usi %xmm0, %rax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptoui float %x to i64
+  ret i64 %conv
+}
+
+define i32 @test_float_to_ui32(float %x) {
+; X64-LABEL: test_float_to_ui32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttss2si %xmm0, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_float_to_ui32:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttss2usi %xmm0, %eax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptoui float %x to i32
+  ret i32 %conv
+}
+
+define zeroext i16 @test_float_to_ui16(float %x) {
+; X64-LABEL: test_float_to_ui16:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttss2si %xmm0, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+;
+; SDAG-AVX512-LABEL: test_float_to_ui16:
+; SDAG-AVX512:       # %bb.0: # %entry
+; SDAG-AVX512-NEXT:    vcvttss2si %xmm0, %eax
+; SDAG-AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
+; SDAG-AVX512-NEXT:    retq
+;
+; GISEL-AVX512-LABEL: test_float_to_ui16:
+; GISEL-AVX512:       # %bb.0: # %entry
+; GISEL-AVX512-NEXT:    vcvttss2usi %xmm0, %eax
+; GISEL-AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
+; GISEL-AVX512-NEXT:    retq
+entry:
+  %conv = fptoui float %x to i16
+  ret i16 %conv
+}
+
+define zeroext i8 @test_float_to_ui8(float %x) {
+; X64-LABEL: test_float_to_ui8:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttss2si %xmm0, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+;
+; SDAG-AVX512-LABEL: test_float_to_ui8:
+; SDAG-AVX512:       # %bb.0: # %entry
+; SDAG-AVX512-NEXT:    vcvttss2si %xmm0, %eax
+; SDAG-AVX512-NEXT:    # kill: def $al killed $al killed $eax
+; SDAG-AVX512-NEXT:    retq
+;
+; GISEL-AVX512-LABEL: test_float_to_ui8:
+; GISEL-AVX512:       # %bb.0: # %entry
+; GISEL-AVX512-NEXT:    vcvttss2usi %xmm0, %eax
+; GISEL-AVX512-NEXT:    # kill: def $al killed $al killed $eax
+; GISEL-AVX512-NEXT:    retq
+entry:
+  %conv = fptoui float %x to i8
+  ret i8 %conv
+}
+
+define i64 @test_double_to_si64(double %x) {
+; X64-LABEL: test_double_to_si64:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttsd2si %xmm0, %rax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_double_to_si64:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttsd2si %xmm0, %rax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptosi double %x to i64
+  ret i64 %conv
+}
+
+define i32 @test_double_to_si32(double %x) {
+; X64-LABEL: test_double_to_si32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttsd2si %xmm0, %eax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_double_to_si32:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttsd2si %xmm0, %eax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptosi double %x to i32
+  ret i32 %conv
+}
+
+define signext i16 @test_double_to_si16(double %x) {
+; X64-LABEL: test_double_to_si16:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttsd2si %xmm0, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_double_to_si16:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttsd2si %xmm0, %eax
+; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptosi double %x to i16
+  ret i16 %conv
+}
+
+define signext i8 @test_double_to_si8(double %x) {
+; X64-LABEL: test_double_to_si8:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttsd2si %xmm0, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_double_to_si8:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttsd2si %xmm0, %eax
+; AVX512-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptosi double %x to i8
+  ret i8 %conv
+}
+
+define i64 @test_float_to_si64(float %x) {
+; X64-LABEL: test_float_to_si64:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttss2si %xmm0, %rax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_float_to_si64:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttss2si %xmm0, %rax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptosi float %x to i64
+  ret i64 %conv
+}
+
+define i32 @test_float_to_si32(float %x) {
+; X64-LABEL: test_float_to_si32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttss2si %xmm0, %eax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_float_to_si32:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttss2si %xmm0, %eax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptosi float %x to i32
+  ret i32 %conv
+}
+
+define signext i16 @test_float_to_si16(float %x) {
+; X64-LABEL: test_float_to_si16:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvttss2si %xmm0, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+;
+; AVX512-LABEL: test_float_to_si16:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvttss2si %xmm0, %eax
+; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT:    retq
+entry:
+  %conv = fptosi float %x to i16
+  ret i16 %co...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/100079


More information about the llvm-commits mailing list