[llvm] r245924 - [X86] Fix fptoui conversions
Michael Kuperstein via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 25 00:42:33 PDT 2015
Author: mkuper
Date: Tue Aug 25 02:42:09 2015
New Revision: 245924
URL: http://llvm.org/viewvc/llvm-project?rev=245924&view=rev
Log:
[X86] Fix fptoui conversions
This fixes two issues in x86 fptoui lowering.
1) Makes conversions from f80 go through the right path on AVX-512.
2) Implements an inline sequence for fptoui i64 instead of a library
call. This improves performance by 6X on SSE3+ and 3X otherwise.
Incidentally, it also removes the use of ftol2 for fptoui, which was
wrong to begin with, as ftol2 converts to a signed i64, producing
wrong results for values >= 2^63.
Patch by: mitch.l.bodart at intel.com
Differential Revision: http://reviews.llvm.org/D11316
Added:
llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/test/CodeGen/X86/pr17631.ll
llvm/trunk/test/CodeGen/X86/win_ftol2.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=245924&r1=245923&r2=245924&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Aug 25 02:42:09 2015
@@ -114,13 +114,6 @@ X86TargetLowering::X86TargetLowering(con
setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
-
- // The _ftol2 runtime function has an unusual calling conv, which
- // is modeled by a special pseudo-instruction.
- setLibcallName(RTLIB::FPTOUINT_F64_I64, nullptr);
- setLibcallName(RTLIB::FPTOUINT_F32_I64, nullptr);
- setLibcallName(RTLIB::FPTOUINT_F64_I32, nullptr);
- setLibcallName(RTLIB::FPTOUINT_F32_I32, nullptr);
}
if (Subtarget->isTargetDarwin()) {
@@ -228,8 +221,14 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
if (Subtarget->is64Bit()) {
- setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
- setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
+ if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
+ // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
+ } else {
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
+ }
} else if (!Subtarget->useSoftFloat()) {
// Since AVX is a superset of SSE3, only check for SSE here.
if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
@@ -238,14 +237,11 @@ X86TargetLowering::X86TargetLowering(con
// the optimal thing for SSE vs. the default expansion in the legalizer.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
else
+ // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
// With SSE3 we can use fisttpll to convert to a signed i64; without
// SSE, we're stuck with a fistpll.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
- }
- if (isTargetFTOL()) {
- // Use the _ftol2 runtime function, which has a pseudo-instruction
- // to handle its weird calling convention.
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
}
@@ -1330,13 +1326,10 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::FMA, MVT::v8f64, Legal);
setOperationAction(ISD::FMA, MVT::v16f32, Legal);
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
+ // FIXME: [US]INT_TO_FP are not legal for f80.
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
if (Subtarget->is64Bit()) {
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
}
@@ -12334,15 +12327,45 @@ SDValue X86TargetLowering::LowerUINT_TO_
DAG.getIntPtrConstant(0, dl));
}
+// If the given FP_TO_SINT (IsSigned) or FP_TO_UINT (!IsSigned) operation
+// is legal, or has an f16 source (which needs to be promoted to f32),
+// just return an <SDValue(), SDValue()> pair.
+// Otherwise it is assumed to be a conversion from one of f32, f64 or f80
+// to i16, i32 or i64, and we lower it to a legal sequence.
+// If lowered to the final integer result we return a <result, SDValue()> pair.
+// Otherwise we lower it to a sequence ending with a FIST, return a
+// <FIST, StackSlot> pair, and the caller is responsible for loading
+// the final integer result from StackSlot.
std::pair<SDValue,SDValue>
-X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
- bool IsSigned, bool IsReplace) const {
+X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
+ bool IsSigned, bool IsReplace) const {
SDLoc DL(Op);
EVT DstTy = Op.getValueType();
+ EVT TheVT = Op.getOperand(0).getValueType();
auto PtrVT = getPointerTy(DAG.getDataLayout());
- if (!IsSigned && !isIntegerTypeFTOL(DstTy)) {
+ if (TheVT == MVT::f16)
+ // We need to promote the f16 to f32 before using the lowering
+ // in this routine.
+ return std::make_pair(SDValue(), SDValue());
+
+ assert((TheVT == MVT::f32 ||
+ TheVT == MVT::f64 ||
+ TheVT == MVT::f80) &&
+ "Unexpected FP operand type in FP_TO_INTHelper");
+
+ // If using FIST to compute an unsigned i64, we'll need some fixup
+ // to handle values above the maximum signed i64. A FIST is always
+ // used for the 32-bit subtarget, but also for f80 on a 64-bit target.
+ bool UnsignedFixup = !IsSigned &&
+ DstTy == MVT::i64 &&
+ (!Subtarget->is64Bit() ||
+ !isScalarFPTypeInSSEReg(TheVT));
+
+ if (!IsSigned && DstTy != MVT::i64 && !Subtarget->hasAVX512()) {
+ // Replace the fp-to-uint32 operation with an fp-to-sint64 FIST.
+ // The low 32 bits of the fist result will have the correct uint32 result.
assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
DstTy = MVT::i64;
}
@@ -12360,27 +12383,72 @@ X86TargetLowering:: FP_TO_INTHelper(SDVa
isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
return std::make_pair(SDValue(), SDValue());
- // We lower FP->int64 either into FISTP64 followed by a load from a temporary
- // stack slot, or into the FTOL runtime function.
+ // We lower FP->int64 into FISTP64 followed by a load from a temporary
+ // stack slot.
MachineFunction &MF = DAG.getMachineFunction();
unsigned MemSize = DstTy.getSizeInBits()/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
unsigned Opc;
- if (!IsSigned && isIntegerTypeFTOL(DstTy))
- Opc = X86ISD::WIN_FTOL;
- else
- switch (DstTy.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
- case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
- case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
- case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
- }
+ switch (DstTy.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
+ case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
+ case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
+ case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
+ }
SDValue Chain = DAG.getEntryNode();
SDValue Value = Op.getOperand(0);
- EVT TheVT = Op.getOperand(0).getValueType();
+ SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment.
+
+ if (UnsignedFixup) {
+ //
+ // Conversion to unsigned i64 is implemented with a select,
+ // depending on whether the source value fits in the range
+ // of a signed i64. Let Thresh be the FP equivalent of
+ // 0x8000000000000000ULL.
+ //
+ // Adjust i32 = (Value < Thresh) ? 0 : 0x80000000;
+ // FistSrc = (Value < Thresh) ? Value : (Value - Thresh);
+ // Fist-to-mem64 FistSrc
+ // Add 0 or 0x800...0ULL to the 64-bit result, which is equivalent
+ // to XOR'ing the high 32 bits with Adjust.
+ //
+ // Being a power of 2, Thresh is exactly representable in all FP formats.
+ // For X87 we'd like to use the smallest FP type for this constant, but
+ // for DAG type consistency we have to match the FP operand type.
+
+ APFloat Thresh(APFloat::IEEEsingle, APInt(32, 0x5f000000));
+ APFloat::opStatus Status = APFloat::opOK;
+ bool LosesInfo = false;
+ if (TheVT == MVT::f64)
+ // The rounding mode is irrelevant as the conversion should be exact.
+ Status = Thresh.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+ &LosesInfo);
+ else if (TheVT == MVT::f80)
+ Status = Thresh.convert(APFloat::x87DoubleExtended,
+ APFloat::rmNearestTiesToEven, &LosesInfo);
+
+ assert(Status == APFloat::opOK && !LosesInfo &&
+ "FP conversion should have been exact");
+
+ SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT);
+
+ SDValue Cmp = DAG.getSetCC(DL,
+ getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), TheVT),
+ Value, ThreshVal, ISD::SETLT);
+ Adjust = DAG.getSelect(DL, MVT::i32, Cmp,
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(0x80000000, DL, MVT::i32));
+ SDValue Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal);
+ Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), TheVT),
+ Value, ThreshVal, ISD::SETLT);
+ Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub);
+ }
+
// FIXME This causes a redundant load/store if the SSE-class value is already
// in memory, such as if it is on the callstack.
if (isScalarFPTypeInSSEReg(TheVT)) {
@@ -12406,25 +12474,49 @@ X86TargetLowering:: FP_TO_INTHelper(SDVa
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
MachineMemOperand::MOStore, MemSize, MemSize);
- if (Opc != X86ISD::WIN_FTOL) {
+ if (UnsignedFixup) {
+
+ // Insert the FIST, load its result as two i32's,
+ // and XOR the high i32 with Adjust.
+
+ SDValue FistOps[] = { Chain, Value, StackSlot };
+ SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
+ FistOps, DstTy, MMO);
+
+ SDValue Low32 = DAG.getLoad(MVT::i32, DL, FIST, StackSlot,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ SDValue HighAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackSlot,
+ DAG.getConstant(4, DL, PtrVT));
+
+ SDValue High32 = DAG.getLoad(MVT::i32, DL, FIST, HighAddr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ High32 = DAG.getNode(ISD::XOR, DL, MVT::i32, High32, Adjust);
+
+ if (Subtarget->is64Bit()) {
+ // Join High32 and Low32 into a 64-bit result.
+ // (High32 << 32) | Low32
+ Low32 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Low32);
+ High32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, High32);
+ High32 = DAG.getNode(ISD::SHL, DL, MVT::i64, High32,
+ DAG.getConstant(32, DL, MVT::i8));
+ SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i64, High32, Low32);
+ return std::make_pair(Result, SDValue());
+ }
+
+ SDValue ResultOps[] = { Low32, High32 };
+
+ SDValue pair = IsReplace
+ ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResultOps)
+ : DAG.getMergeValues(ResultOps, DL);
+ return std::make_pair(pair, SDValue());
+ } else {
// Build the FP_TO_INT*_IN_MEM
SDValue Ops[] = { Chain, Value, StackSlot };
SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
Ops, DstTy, MMO);
return std::make_pair(FIST, StackSlot);
- } else {
- SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL,
- DAG.getVTList(MVT::Other, MVT::Glue),
- Chain, Value);
- SDValue eax = DAG.getCopyFromReg(ftol, DL, X86::EAX,
- MVT::i32, ftol.getValue(1));
- SDValue edx = DAG.getCopyFromReg(eax.getValue(1), DL, X86::EDX,
- MVT::i32, eax.getValue(2));
- SDValue Ops[] = { eax, edx };
- SDValue pair = IsReplace
- ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops)
- : DAG.getMergeValues(Ops, DL);
- return std::make_pair(pair, SDValue());
}
}
@@ -12688,7 +12780,8 @@ SDValue X86TargetLowering::LowerFP_TO_SI
/*IsSigned=*/ true, /*IsReplace=*/ false);
SDValue FIST = Vals.first, StackSlot = Vals.second;
// If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
- if (!FIST.getNode()) return Op;
+ if (!FIST.getNode())
+ return Op;
if (StackSlot.getNode())
// Load the result.
@@ -12705,7 +12798,9 @@ SDValue X86TargetLowering::LowerFP_TO_UI
std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
/*IsSigned=*/ false, /*IsReplace=*/ false);
SDValue FIST = Vals.first, StackSlot = Vals.second;
- assert(FIST.getNode() && "Unexpected failure");
+ // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
+ if (!FIST.getNode())
+ return Op;
if (StackSlot.getNode())
// Load the result.
@@ -18885,17 +18980,9 @@ void X86TargetLowering::ReplaceNodeResul
return;
}
case ISD::FP_TO_SINT:
- // FP_TO_INT*_IN_MEM is not legal for f16 inputs. Do not convert
- // (FP_TO_SINT (load f16)) to FP_TO_INT*.
- if (N->getOperand(0).getValueType() == MVT::f16)
- break;
- // fallthrough
case ISD::FP_TO_UINT: {
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
- if (!IsSigned && !isIntegerTypeFTOL(SDValue(N, 0).getValueType()))
- return;
-
std::pair<SDValue,SDValue> Vals =
FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true);
SDValue FIST = Vals.first, StackSlot = Vals.second;
@@ -26507,10 +26594,6 @@ int X86TargetLowering::getScalingFactorC
return -1;
}
-bool X86TargetLowering::isTargetFTOL() const {
- return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit();
-}
-
bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
// Integer division on x86 is expensive. However, when aggressively optimizing
// for code size, we prefer to use a div instruction, as it is usually smaller
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=245924&r1=245923&r2=245924&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue Aug 25 02:42:09 2015
@@ -856,15 +856,6 @@ namespace llvm {
(VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
}
- /// Return true if the target uses the MSVC _ftol2 routine for fptoui.
- bool isTargetFTOL() const;
-
- /// Return true if the MSVC _ftol2 routine should be used for fptoui to the
- /// given type.
- bool isIntegerTypeFTOL(EVT VT) const {
- return isTargetFTOL() && VT == MVT::i64;
- }
-
/// \brief Returns true if it is beneficial to convert a load of a constant
/// to just the constant itself.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Modified: llvm/trunk/test/CodeGen/X86/pr17631.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr17631.ll?rev=245924&r1=245923&r2=245924&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr17631.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr17631.ll Tue Aug 25 02:42:09 2015
@@ -30,5 +30,5 @@ define <8 x float> @foo(<8 x float> %y,
; CHECK: foo
; CHECK-NOT: vzeroupper
-; CHECK: _ftol2
+; CHECK: {{cvtt|fist}}
; CHECK: ret
Added: llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll?rev=245924&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll (added)
+++ llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll Tue Aug 25 02:42:09 2015
@@ -0,0 +1,135 @@
+; Check that scalar FP conversions to signed and unsigned int64 are using
+; reasonable sequences, across platforms and target switches.
+;
+; The signed case is straight forward, and the tests here basically
+; ensure successful compilation (f80 with avx512 was broken at one point).
+;
+; For the unsigned case there are many possible sequences, so to avoid
+; a fragile test we just check for the presence of a few key instructions.
+; AVX512 on Intel64 can use vcvtts[ds]2usi directly for float and double.
+; Otherwise the sequence will involve an FP subtract (fsub, subss or subsd),
+; and a truncating conversion (cvtts[ds]2si, fisttp, or fnstcw+fist). When
+; both a subtract and fnstcw are needed, they can occur in either order.
+;
+; The interesting subtargets are AVX512F (vcvtts[ds]2usi), SSE3 (fisttp),
+; SSE2 (cvtts[ds]2si) and vanilla X87 (fnstcw+fist, 32-bit only).
+;
+; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_32
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_32
+; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_64
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_64
+; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_32
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_32
+; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_64
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_64
+; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_32
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_32
+; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_64
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_64
+; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=-sse | FileCheck %s --check-prefix=CHECK --check-prefix=X87
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=-sse | FileCheck %s --check-prefix=CHECK --check-prefix=X87
+
+; CHECK-LABEL: f_to_u64
+; X87-DAG: fsub
+; X87-DAG: fnstcw
+; X87: fist
+; SSE2_32-DAG: {{subss|fsub}}
+; SSE2_32-DAG: fnstcw
+; SSE2_32: fist
+; SSE2_64: subss
+; SSE2_64: cvttss2si
+; SSE3_32: {{subss|fsub}}
+; SSE3_32: fistt
+; SSE3_64: subss
+; SSE3_64: cvttss2si
+; AVX512_32: {{subss|fsub}}
+; AVX512_32: fistt
+; AVX512_64: vcvttss2usi
+; CHECK: ret
+define i64 @f_to_u64(float %a) nounwind {
+ %r = fptoui float %a to i64
+ ret i64 %r
+}
+
+; CHECK-LABEL: f_to_s64
+; X87: fnstcw
+; X87: fist
+; SSE2_32: fnstcw
+; SSE2_32: fist
+; SSE2_64: cvttss2si
+; SSE3_32: fistt
+; SSE3_64: cvttss2si
+; AVX512_32: fistt
+; AVX512_64: vcvttss2si
+; CHECK: ret
+define i64 @f_to_s64(float %a) nounwind {
+ %r = fptosi float %a to i64
+ ret i64 %r
+}
+
+; CHECK-LABEL: d_to_u64
+; X87-DAG: fsub
+; X87-DAG: fnstcw
+; X87: fist
+; SSE2_32-DAG: {{subsd|fsub}}
+; SSE2_32-DAG: fnstcw
+; SSE2_32: fist
+; SSE2_64: subsd
+; SSE2_64: cvttsd2si
+; SSE3_32: {{subsd|fsub}}
+; SSE3_32: fistt
+; SSE3_64: subsd
+; SSE3_64: cvttsd2si
+; AVX512_32: {{subsd|fsub}}
+; AVX512_32: fistt
+; AVX512_64: vcvttsd2usi
+; CHECK: ret
+define i64 @d_to_u64(double %a) nounwind {
+ %r = fptoui double %a to i64
+ ret i64 %r
+}
+
+; CHECK-LABEL: d_to_s64
+; X87: fnstcw
+; X87: fist
+; SSE2_32: fnstcw
+; SSE2_32: fist
+; SSE2_64: cvttsd2si
+; SSE3_32: fistt
+; SSE3_64: cvttsd2si
+; AVX512_32: fistt
+; AVX512_64: vcvttsd2si
+; CHECK: ret
+define i64 @d_to_s64(double %a) nounwind {
+ %r = fptosi double %a to i64
+ ret i64 %r
+}
+
+; CHECK-LABEL: x_to_u64
+; CHECK-DAG: fsub
+; X87-DAG: fnstcw
+; SSE2_32-DAG: fnstcw
+; SSE2_64-DAG: fnstcw
+; CHECK: fist
+; CHECK: ret
+define i64 @x_to_u64(x86_fp80 %a) nounwind {
+ %r = fptoui x86_fp80 %a to i64
+ ret i64 %r
+}
+
+; CHECK-LABEL: x_to_s64
+; X87: fnstcw
+; X87: fist
+; SSE2_32: fnstcw
+; SSE2_32: fist
+; SSE2_64: fnstcw
+; SSE2_64: fist
+; SSE3_32: fistt
+; SSE3_64: fistt
+; AVX512_32: fistt
+; AVX512_64: fistt
+; CHECK: ret
+define i64 @x_to_s64(x86_fp80 %a) nounwind {
+ %r = fptosi x86_fp80 %a to i64
+ ret i64 %r
+}
Modified: llvm/trunk/test/CodeGen/X86/win_ftol2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/win_ftol2.ll?rev=245924&r1=245923&r2=245924&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/win_ftol2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/win_ftol2.ll Tue Aug 25 02:42:09 2015
@@ -1,15 +1,17 @@
-; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=generic | FileCheck %s -check-prefix=FTOL
+; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=generic | FileCheck %s -check-prefix=COMPILERRT
; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=COMPILERRT
; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=COMPILERRT
; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=COMPILERRT
-; RUN: llc < %s -mattr=-sse -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL_2
+; RUN: llc < %s -mattr=-sse -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=COMPILERRT
-; Win32 targets use the MSVCRT _ftol2 runtime function for fptoui to i64. This
-; function has a nonstandard calling convention: the input value is expected on
-; the x87 stack instead of the callstack. The input value is popped by the
-; callee. Mingw32 uses normal cdecl compiler-rt functions.
+; This test originally used the FTOL and FTOL_2 checks for the i686-pc-win32
+; triples, under the assumption that Win32 targets should use the MSVCRT
+; _ftol2 runtime function for fptoui to i64. That usage was incorrect,
+; as _ftol2 performs conversion to signed i64. As of the compiler fix,
+; the FTOL/FTOL_2 checks are no longer used, which basically renders
+; this test meaningless.
define i64 @double_ui64(double %x) nounwind {
entry:
More information about the llvm-commits
mailing list