[llvm] [X86][GlobalISel] Enable scalar versions of G_UITOFP and G_FPTOUI (PR #100079)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 23 01:26:03 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Evgenii Kudriashov (e-kud)
<details>
<summary>Changes</summary>
Also add tests for G_SITOFP and G_FPTOSI
---
Patch is 30.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100079.diff
5 Files Affected:
- (modified) llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp (+157)
- (modified) llvm/lib/Target/X86/GISel/X86LegalizerInfo.h (+6)
- (modified) llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp (+6-4)
- (added) llvm/test/CodeGen/X86/isel-fp-to-int.ll (+323)
- (added) llvm/test/CodeGen/X86/isel-int-to-fp.ll (+320)
``````````diff
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index dd8ecf6ef7fc7..0f1d3343630d7 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -497,6 +497,53 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
.clampScalar(0, s32, sMaxScalar)
.widenScalarToNextPow2(1);
+ // For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize s16
+ // manually. Otherwise, in custom handler there is no way to understand
+ // whether s32 is an original type and we need to promote it to s64 or s32 is
+ // obtained after widening s16 and we shouldn't widen it to s64.
+ //
+ // For AVX512 we simply widen types as there is direct mapping from opcodes
+ // to asm instructions.
+ getActionDefinitionsBuilder(G_UITOFP)
+ .legalIf([=](const LegalityQuery &Query) {
+ return HasAVX512 && typeInSet(0, {s32, s64})(Query) &&
+ typeInSet(1, {s32, s64})(Query);
+ })
+ .customIf([=](const LegalityQuery &Query) {
+ if (HasAVX512)
+ return false;
+ return (HasSSE1 &&
+ (typePairInSet(0, 1, {{s32, s32}, {s32, s16}})(Query) ||
+ (Is64Bit && typePairInSet(0, 1, {{s32, s64}})(Query)))) ||
+ (HasSSE2 &&
+ (typePairInSet(0, 1, {{s64, s32}, {s64, s16}})(Query) ||
+ (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query))));
+ })
+ .clampScalar(1, HasAVX512 ? s32 : s16, sMaxScalar)
+ .widenScalarToNextPow2(1)
+ .clampScalar(0, s32, HasSSE2 ? s64 : s32)
+ .widenScalarToNextPow2(0);
+
+ getActionDefinitionsBuilder(G_FPTOUI)
+ .legalIf([=](const LegalityQuery &Query) {
+ return HasAVX512 && typeInSet(0, {s32, s64})(Query) &&
+ typeInSet(1, {s32, s64})(Query);
+ })
+ .customIf([=](const LegalityQuery &Query) {
+ if (HasAVX512)
+ return false;
+ return (HasSSE1 &&
+ (typePairInSet(0, 1, {{s32, s32}, {s16, s32}})(Query) ||
+ (Is64Bit && typePairInSet(0, 1, {{s64, s32}})(Query)))) ||
+ (HasSSE2 &&
+ (typePairInSet(0, 1, {{s32, s64}, {s16, s64}})(Query) ||
+ (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query))));
+ })
+ .clampScalar(1, s32, sMaxScalar)
+ .widenScalarToNextPow2(1)
+ .clampScalar(0, HasAVX512 ? s32 : s16, HasSSE2 ? s64 : s32)
+ .widenScalarToNextPow2(0);
+
// vector ops
getActionDefinitionsBuilder(G_BUILD_VECTOR)
.customIf([=](const LegalityQuery &Query) {
@@ -589,6 +636,10 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
return false;
case TargetOpcode::G_BUILD_VECTOR:
return legalizeBuildVector(MI, MRI, Helper);
+ case TargetOpcode::G_FPTOUI:
+ return legalizeFPTOUI(MI, MRI, Helper);
+ case TargetOpcode::G_UITOFP:
+ return legalizeUITOFP(MI, MRI, Helper);
}
llvm_unreachable("expected switch to return");
}
@@ -644,6 +695,112 @@ bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI,
return true;
}
+bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+ unsigned DstSizeInBits = DstTy.getScalarSizeInBits();
+ const LLT s32 = LLT::scalar(32);
+ const LLT s64 = LLT::scalar(64);
+
+ // Simply reuse FPTOSI when it is possible to widen the type
+ if (DstSizeInBits == 16 || DstSizeInBits == 32) {
+ auto Casted = MIRBuilder.buildFPTOSI(LLT::scalar(DstSizeInBits * 2), Src);
+ MIRBuilder.buildTrunc(Dst, Casted);
+ MI.eraseFromParent();
+ return true;
+ }
+ if (DstTy == s64) {
+ APInt TwoPExpInt = APInt::getSignMask(DstSizeInBits);
+ APFloat TwoPExpFP(SrcTy == s32 ? APFloat::IEEEsingle()
+ : APFloat::IEEEdouble(),
+ APInt::getZero(SrcTy.getSizeInBits()));
+ TwoPExpFP.convertFromAPInt(TwoPExpInt, /*IsSigned=*/false,
+ APFloat::rmNearestTiesToEven);
+
+ // For fp Src greater or equal to Threshold(2^Exp), we use FPTOSI on
+ // (Src - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
+ // For fp Src smaller, (Src - 2^Exp) is zeroed by And, the final result
+ // is FPTOSI on Src.
+ auto Casted = MIRBuilder.buildFPTOSI(DstTy, Src);
+ auto Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
+ auto FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
+ auto ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
+ auto Shift = MIRBuilder.buildConstant(DstTy, DstSizeInBits - 1);
+ auto ResHighBit = MIRBuilder.buildAShr(DstTy, Casted, Shift);
+ auto And = MIRBuilder.buildAnd(DstTy, ResHighBit, ResLowBits);
+ MIRBuilder.buildOr(Dst, And, Casted);
+ MI.eraseFromParent();
+ return true;
+ }
+ return false;
+}
+
+bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+ const LLT s16 = LLT::scalar(16);
+ const LLT s32 = LLT::scalar(32);
+ const LLT s64 = LLT::scalar(64);
+
+ // Simply reuse SITOFP when it is possible to widen the type
+ if (SrcTy == s16 || SrcTy == s32) {
+ const LLT WidenTy = LLT::scalar(SrcTy.getScalarSizeInBits() * 2);
+ auto Ext = MIRBuilder.buildZExt(WidenTy, Src);
+ MIRBuilder.buildSITOFP(Dst, Ext);
+ MI.eraseFromParent();
+ return true;
+ }
+ if (SrcTy == s64 && DstTy == s32) {
+ // For i64 < INT_MAX we simply reuse SITOFP.
+ // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
+ // saved before division, convert to float by SITOFP, multiply the result
+ // by 2.
+ auto SmallResult = MIRBuilder.buildSITOFP(DstTy, Src);
+ auto One = MIRBuilder.buildConstant(SrcTy, 1);
+ auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
+ auto Halved = MIRBuilder.buildLShr(SrcTy, Src, One);
+ auto LowerBit = MIRBuilder.buildAnd(SrcTy, Src, One);
+ auto RoundedHalved = MIRBuilder.buildOr(SrcTy, Halved, LowerBit);
+ auto HalvedFP = MIRBuilder.buildSITOFP(DstTy, RoundedHalved);
+ auto LargeResult = MIRBuilder.buildFAdd(DstTy, HalvedFP, HalvedFP);
+ auto IsLarge = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT,
+ LLT::scalar(1), Src, Zero);
+ MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
+ MI.eraseFromParent();
+ return true;
+ }
+ if (SrcTy == s64 && DstTy == s64) {
+ // TODO: rewrite on vector shuffles when supported.
+ // We create doubles from 32 bit parts with 32 exponent difference.
+ //
+ // X = 2^52 * 1.0...LowBits
+ // Y = 2^84 * 1.0...HighBits
+ // Temp = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
+ // = - 2^52 * 1.0...HighBits
+ // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
+ auto TwoP52 = MIRBuilder.buildConstant(s64, UINT64_C(0x4330000000000000));
+ auto TwoP84 = MIRBuilder.buildConstant(s64, UINT64_C(0x4530000000000000));
+ auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
+ auto TwoP52P84FP = MIRBuilder.buildFConstant(s64, TwoP52P84);
+ auto HalfWidth = MIRBuilder.buildConstant(s64, 32);
+
+ auto LowBits = MIRBuilder.buildTrunc(s32, Src);
+ LowBits = MIRBuilder.buildZExt(s64, LowBits);
+ auto LowBitsFP = MIRBuilder.buildOr(s64, TwoP52, LowBits);
+ auto HighBits = MIRBuilder.buildLShr(s64, Src, HalfWidth);
+ auto HighBitsFP = MIRBuilder.buildOr(s64, TwoP84, HighBits);
+ auto Scratch = MIRBuilder.buildFSub(s64, HighBitsFP, TwoP52P84FP);
+ MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
+ MI.eraseFromParent();
+ return true;
+ }
+ return false;
+}
+
bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
return true;
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
index 229a58986903d..39bd9892e2f16 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
@@ -39,6 +39,12 @@ class X86LegalizerInfo : public LegalizerInfo {
private:
bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
+
+ bool legalizeFPTOUI(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const;
+
+ bool legalizeUITOFP(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const;
};
} // namespace llvm
#endif
diff --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
index 9e85424e76e62..43d52fc0e5c3b 100644
--- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
@@ -323,7 +323,9 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ true, OpRegBankIdx);
break;
case TargetOpcode::G_SITOFP:
- case TargetOpcode::G_FPTOSI: {
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_UITOFP:
+ case TargetOpcode::G_FPTOUI: {
// Some of the floating-point instructions have mixed GPR and FP
// operands: fine-tune the computed mapping.
auto &Op0 = MI.getOperand(0);
@@ -331,10 +333,10 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const LLT Ty0 = MRI.getType(Op0.getReg());
const LLT Ty1 = MRI.getType(Op1.getReg());
- bool FirstArgIsFP = Opc == TargetOpcode::G_SITOFP;
- bool SecondArgIsFP = Opc == TargetOpcode::G_FPTOSI;
+ bool FirstArgIsFP =
+ Opc == TargetOpcode::G_SITOFP || Opc == TargetOpcode::G_UITOFP;
OpRegBankIdx[0] = getPartialMappingIdx(MI, Ty0, /* isFP= */ FirstArgIsFP);
- OpRegBankIdx[1] = getPartialMappingIdx(MI, Ty1, /* isFP= */ SecondArgIsFP);
+ OpRegBankIdx[1] = getPartialMappingIdx(MI, Ty1, /* isFP= */ !FirstArgIsFP);
break;
}
case TargetOpcode::G_FCMP: {
diff --git a/llvm/test/CodeGen/X86/isel-fp-to-int.ll b/llvm/test/CodeGen/X86/isel-fp-to-int.ll
new file mode 100644
index 0000000000000..0dcca65e06d4a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fp-to-int.ll
@@ -0,0 +1,323 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,SDAG-X64
+; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,GISEL-X64
+; RUN: llc < %s -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,SDAG-AVX512
+; RUN: llc < %s -global-isel -global-isel-abort=1 -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,GISEL-AVX512
+
+define i64 @test_double_to_ui64(double %x) {
+; SDAG-X64-LABEL: test_double_to_ui64:
+; SDAG-X64: # %bb.0: # %entry
+; SDAG-X64-NEXT: cvttsd2si %xmm0, %rcx
+; SDAG-X64-NEXT: movq %rcx, %rdx
+; SDAG-X64-NEXT: sarq $63, %rdx
+; SDAG-X64-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SDAG-X64-NEXT: cvttsd2si %xmm0, %rax
+; SDAG-X64-NEXT: andq %rdx, %rax
+; SDAG-X64-NEXT: orq %rcx, %rax
+; SDAG-X64-NEXT: retq
+;
+; GISEL-X64-LABEL: test_double_to_ui64:
+; GISEL-X64: # %bb.0: # %entry
+; GISEL-X64-NEXT: cvttsd2si %xmm0, %rcx
+; GISEL-X64-NEXT: movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
+; GISEL-X64-NEXT: subsd %xmm1, %xmm0
+; GISEL-X64-NEXT: cvttsd2si %xmm0, %rdx
+; GISEL-X64-NEXT: movq %rcx, %rax
+; GISEL-X64-NEXT: sarq $63, %rax
+; GISEL-X64-NEXT: andq %rdx, %rax
+; GISEL-X64-NEXT: orq %rcx, %rax
+; GISEL-X64-NEXT: retq
+;
+; AVX512-LABEL: test_double_to_ui64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vcvttsd2usi %xmm0, %rax
+; AVX512-NEXT: retq
+entry:
+ %conv = fptoui double %x to i64
+ ret i64 %conv
+}
+
+define i32 @test_double_to_ui32(double %x) {
+; X64-LABEL: test_double_to_ui32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvttsd2si %xmm0, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-NEXT: retq
+;
+; AVX512-LABEL: test_double_to_ui32:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vcvttsd2usi %xmm0, %eax
+; AVX512-NEXT: retq
+entry:
+ %conv = fptoui double %x to i32
+ ret i32 %conv
+}
+
+define zeroext i16 @test_double_to_ui16(double %x) {
+; X64-LABEL: test_double_to_ui16:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvttsd2si %xmm0, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+;
+; SDAG-AVX512-LABEL: test_double_to_ui16:
+; SDAG-AVX512: # %bb.0: # %entry
+; SDAG-AVX512-NEXT: vcvttsd2si %xmm0, %eax
+; SDAG-AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; SDAG-AVX512-NEXT: retq
+;
+; GISEL-AVX512-LABEL: test_double_to_ui16:
+; GISEL-AVX512: # %bb.0: # %entry
+; GISEL-AVX512-NEXT: vcvttsd2usi %xmm0, %eax
+; GISEL-AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; GISEL-AVX512-NEXT: retq
+entry:
+ %conv = fptoui double %x to i16
+ ret i16 %conv
+}
+
+define zeroext i8 @test_double_to_ui8(double %x) {
+; X64-LABEL: test_double_to_ui8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvttsd2si %xmm0, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: retq
+;
+; SDAG-AVX512-LABEL: test_double_to_ui8:
+; SDAG-AVX512: # %bb.0: # %entry
+; SDAG-AVX512-NEXT: vcvttsd2si %xmm0, %eax
+; SDAG-AVX512-NEXT: # kill: def $al killed $al killed $eax
+; SDAG-AVX512-NEXT: retq
+;
+; GISEL-AVX512-LABEL: test_double_to_ui8:
+; GISEL-AVX512: # %bb.0: # %entry
+; GISEL-AVX512-NEXT: vcvttsd2usi %xmm0, %eax
+; GISEL-AVX512-NEXT: # kill: def $al killed $al killed $eax
+; GISEL-AVX512-NEXT: retq
+entry:
+ %conv = fptoui double %x to i8
+ ret i8 %conv
+}
+
+define i64 @test_float_to_ui64(float %x) {
+; SDAG-X64-LABEL: test_float_to_ui64:
+; SDAG-X64: # %bb.0: # %entry
+; SDAG-X64-NEXT: cvttss2si %xmm0, %rcx
+; SDAG-X64-NEXT: movq %rcx, %rdx
+; SDAG-X64-NEXT: sarq $63, %rdx
+; SDAG-X64-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SDAG-X64-NEXT: cvttss2si %xmm0, %rax
+; SDAG-X64-NEXT: andq %rdx, %rax
+; SDAG-X64-NEXT: orq %rcx, %rax
+; SDAG-X64-NEXT: retq
+;
+; GISEL-X64-LABEL: test_float_to_ui64:
+; GISEL-X64: # %bb.0: # %entry
+; GISEL-X64-NEXT: cvttss2si %xmm0, %rcx
+; GISEL-X64-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
+; GISEL-X64-NEXT: subss %xmm1, %xmm0
+; GISEL-X64-NEXT: cvttss2si %xmm0, %rdx
+; GISEL-X64-NEXT: movq %rcx, %rax
+; GISEL-X64-NEXT: sarq $63, %rax
+; GISEL-X64-NEXT: andq %rdx, %rax
+; GISEL-X64-NEXT: orq %rcx, %rax
+; GISEL-X64-NEXT: retq
+;
+; AVX512-LABEL: test_float_to_ui64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vcvttss2usi %xmm0, %rax
+; AVX512-NEXT: retq
+entry:
+ %conv = fptoui float %x to i64
+ ret i64 %conv
+}
+
+define i32 @test_float_to_ui32(float %x) {
+; X64-LABEL: test_float_to_ui32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvttss2si %xmm0, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-NEXT: retq
+;
+; AVX512-LABEL: test_float_to_ui32:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vcvttss2usi %xmm0, %eax
+; AVX512-NEXT: retq
+entry:
+ %conv = fptoui float %x to i32
+ ret i32 %conv
+}
+
+define zeroext i16 @test_float_to_ui16(float %x) {
+; X64-LABEL: test_float_to_ui16:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvttss2si %xmm0, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+;
+; SDAG-AVX512-LABEL: test_float_to_ui16:
+; SDAG-AVX512: # %bb.0: # %entry
+; SDAG-AVX512-NEXT: vcvttss2si %xmm0, %eax
+; SDAG-AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; SDAG-AVX512-NEXT: retq
+;
+; GISEL-AVX512-LABEL: test_float_to_ui16:
+; GISEL-AVX512: # %bb.0: # %entry
+; GISEL-AVX512-NEXT: vcvttss2usi %xmm0, %eax
+; GISEL-AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; GISEL-AVX512-NEXT: retq
+entry:
+ %conv = fptoui float %x to i16
+ ret i16 %conv
+}
+
+define zeroext i8 @test_float_to_ui8(float %x) {
+; X64-LABEL: test_float_to_ui8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvttss2si %xmm0, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: retq
+;
+; SDAG-AVX512-LABEL: test_float_to_ui8:
+; SDAG-AVX512: # %bb.0: # %entry
+; SDAG-AVX512-NEXT: vcvttss2si %xmm0, %eax
+; SDAG-AVX512-NEXT: # kill: def $al killed $al killed $eax
+; SDAG-AVX512-NEXT: retq
+;
+; GISEL-AVX512-LABEL: test_float_to_ui8:
+; GISEL-AVX512: # %bb.0: # %entry
+; GISEL-AVX512-NEXT: vcvttss2usi %xmm0, %eax
+; GISEL-AVX512-NEXT: # kill: def $al killed $al killed $eax
+; GISEL-AVX512-NEXT: retq
+entry:
+ %conv = fptoui float %x to i8
+ ret i8 %conv
+}
+
+define i64 @test_double_to_si64(double %x) {
+; X64-LABEL: test_double_to_si64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvttsd2si %xmm0, %rax
+; X64-NEXT: retq
+;
+; AVX512-LABEL: test_double_to_si64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vcvttsd2si %xmm0, %rax
+; AVX512-NEXT: retq
+entry:
+ %conv = fptosi double %x to i64
+ ret i64 %conv
+}
+
+define i32 @test_double_to_si32(double %x) {
+; X64-LABEL: test_double_to_si32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvttsd2si %xmm0, %eax
+; X64-NEXT: retq
+;
+; AVX512-LABEL: test_double_to_si32:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vcvttsd2si %xmm0, %eax
+; AVX512-NEXT: retq
+entry:
+ %conv = fptosi double %x to i32
+ ret i32 %conv
+}
+
+define signext i16 @test_double_to_si16(double %x) {
+; X64-LABEL: test_double_to_si16:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvttsd2si %xmm0, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+;
+; AVX512-LABEL: test_double_to_si16:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vcvttsd2si %xmm0, %eax
+; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT: retq
+entry:
+ %conv = fptosi double %x to i16
+ ret i16 %conv
+}
+
+define signext i8 @test_double_to_si8(double %x) {
+; X64-LABEL: test_double_to_si8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvttsd2si %xmm0, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: retq
+;
+; AVX512-LABEL: test_double_to_si8:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vcvttsd2si %xmm0, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: retq
+entry:
+ %conv = fptosi double %x to i8
+ ret i8 %conv
+}
+
+define i64 @test_float_to_si64(float %x) {
+; X64-LABEL: test_float_to_si64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvttss2si %xmm0, %rax
+; X64-NEXT: retq
+;
+; AVX512-LABEL: test_float_to_si64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vcvttss2si %xmm0, %rax
+; AVX512-NEXT: retq
+entry:
+ %conv = fptosi float %x to i64
+ ret i64 %conv
+}
+
+define i32 @test_float_to_si32(float %x) {
+; X64-LABEL: test_float_to_si32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvttss2si %xmm0, %eax
+; X64-NEXT: retq
+;
+; AVX512-LABEL: test_float_to_si32:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vcvttss2si %xmm0, %eax
+; AVX512-NEXT: retq
+entry:
+ %conv = fptosi float %x to i32
+ ret i32 %conv
+}
+
+define signext i16 @test_float_to_si16(float %x) {
+; X64-LABEL: test_float_to_si16:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvttss2si %xmm0, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+;
+; AVX512-LABEL: test_float_to_si16:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vcvttss2si %xmm0, %eax
+; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT: retq
+entry:
+ %conv = fptosi float %x to i16
+ ret i16 %co...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/100079
More information about the llvm-commits
mailing list