[llvm] 016f42e - [X86] Add custom lowering for lrint/llrint to either cvtss2si/cvtsd2si or fist.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 4 16:17:16 PST 2020
Author: Craig Topper
Date: 2020-02-04T16:15:40-08:00
New Revision: 016f42e3dcd80431d1a5a17c92f83b3a8a1a601c
URL: https://github.com/llvm/llvm-project/commit/016f42e3dcd80431d1a5a17c92f83b3a8a1a601c
DIFF: https://github.com/llvm/llvm-project/commit/016f42e3dcd80431d1a5a17c92f83b3a8a1a601c.diff
LOG: [X86] Add custom lowering for lrint/llrint to either cvtss2si/cvtsd2si or fist.
lrint/llrint are defined as rounding using the current rounding
mode. Numbers that can't be converted raise FE_INVALID and an
implementation defined value is returned. They may also write to
errno.
I believe this means we can use cvtss2si/cvtsd2si or fist to
convert as long as -fno-math-errno is passed on the command line.
Clang will leave them as libcalls if errno is enabled so they
won't become ISD::LRINT/LLRINT in SelectionDAG.
For 64-bit results on a 32-bit target we can't use cvtss2si/cvtsd2si
but we can use fist since it can write to a 64-bit memory location.
Though maybe we could consider using vcvtps2qq/vcvtpd2qq on avx512dq
targets?
gcc also does this optimization.
I think we might be able to do this with STRICT_LRINT/LLRINT as
well, but I've left that for future work.
Differential Revision: https://reviews.llvm.org/D73859
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrFPStack.td
llvm/lib/Target/X86/X86InstrSSE.td
llvm/test/CodeGen/X86/llrint-conv.ll
llvm/test/CodeGen/X86/lrint-conv-i32.ll
llvm/test/CodeGen/X86/lrint-conv-i64.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 81b4b4833131..b3f5378515f2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -270,6 +270,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
+
+ setOperationAction(ISD::LRINT, MVT::f32, Custom);
+ setOperationAction(ISD::LRINT, MVT::f64, Custom);
+ setOperationAction(ISD::LLRINT, MVT::f32, Custom);
+ setOperationAction(ISD::LLRINT, MVT::f64, Custom);
+
+ if (!Subtarget.is64Bit()) {
+ setOperationAction(ISD::LRINT, MVT::i64, Custom);
+ setOperationAction(ISD::LLRINT, MVT::i64, Custom);
+ }
}
// Handle address space casts between mixed sized pointers.
@@ -663,8 +673,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMA, MVT::f80, Expand);
setOperationAction(ISD::LROUND, MVT::f80, Expand);
setOperationAction(ISD::LLROUND, MVT::f80, Expand);
- setOperationAction(ISD::LRINT, MVT::f80, Expand);
- setOperationAction(ISD::LLRINT, MVT::f80, Expand);
+ setOperationAction(ISD::LRINT, MVT::f80, Custom);
+ setOperationAction(ISD::LLRINT, MVT::f80, Custom);
// Handle constrained floating-point operations of scalar.
setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
@@ -20306,6 +20316,63 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("Expected FP_TO_INTHelper to handle all remaining cases.");
}
+SDValue X86TargetLowering::LowerLRINT_LLRINT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+
+ // If the source is in an SSE register, the node is Legal.
+ if (isScalarFPTypeInSSEReg(SrcVT))
+ return Op;
+
+ return LRINT_LLRINTHelper(Op.getNode(), DAG);
+}
+
+SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N,
+ SelectionDAG &DAG) const {
+ EVT DstVT = N->getValueType(0);
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ if (SrcVT != MVT::f32 && SrcVT != MVT::f64 && SrcVT != MVT::f80) {
+ // f16 must be promoted before using the lowering in this routine.
+ // fp128 does not use this lowering.
+ return SDValue();
+ }
+
+ SDLoc DL(N);
+ SDValue Chain = DAG.getEntryNode();
+
+ bool UseSSE = isScalarFPTypeInSSEReg(SrcVT);
+
+ // If we're converting from SSE, the stack slot needs to hold both types.
+ // Otherwise it only needs to hold the DstVT.
+ EVT OtherVT = UseSSE ? SrcVT : DstVT;
+ SDValue StackPtr = DAG.CreateStackTemporary(DstVT, OtherVT);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+
+ if (UseSSE) {
+ assert(DstVT == MVT::i64 && "Invalid LRINT/LLRINT to lower!");
+ Chain = DAG.getStore(Chain, DL, Src, StackPtr, MPI);
+ SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
+ SDValue Ops[] = { Chain, StackPtr };
+
+ Src = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, SrcVT, MPI,
+ /*Align*/0, MachineMemOperand::MOLoad);
+ Chain = Src.getValue(1);
+ }
+
+ SDValue StoreOps[] = { Chain, Src, StackPtr };
+ Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, DL,
+ DAG.getVTList(MVT::Other), StoreOps,
+ DstVT, MPI, /*Align*/0,
+ MachineMemOperand::MOStore);
+
+ return DAG.getLoad(DstVT, DL, Chain, StackPtr, MPI);
+}
+
SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
bool IsStrict = Op->isStrictFPOpcode();
@@ -28637,6 +28704,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FNEG: return LowerFABSorFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
+ case ISD::LRINT:
+ case ISD::LLRINT: return LowerLRINT_LLRINT(Op, DAG);
case ISD::SETCC:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: return LowerSETCC(Op, DAG);
@@ -29191,6 +29260,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
return;
}
+ case ISD::LRINT:
+ case ISD::LLRINT: {
+ if (SDValue V = LRINT_LLRINTHelper(N, DAG))
+ Results.push_back(V);
+ return;
+ }
+
case ISD::SINT_TO_FP:
case ISD::STRICT_SINT_TO_FP:
case ISD::UINT_TO_FP:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 6f3aad83e2dd..62b176679fd6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1334,6 +1334,7 @@ namespace llvm {
SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned,
SDValue &Chain) const;
+ SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -1357,6 +1358,7 @@ namespace llvm {
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 3776eadfbe81..4c2ee5d4d721 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7233,6 +7233,45 @@ defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2u
X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
+ X86VectorVTInfo DstVT, SDNode OpNode,
+ X86FoldableSchedWrite sched,
+ string aliasStr> {
+ let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
+ let isCodeGenOnly = 1 in {
+ def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
+ EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
+ def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
+ EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
+ }
+ } // Predicates = [HasAVX512]
+}
+
+defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
+ lrint, WriteCvtSS2I,
+ "{l}">, XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
+ llrint, WriteCvtSS2I,
+ "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
+ lrint, WriteCvtSD2I,
+ "{l}">, XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
+ llrint, WriteCvtSD2I,
+ "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
+
+let Predicates = [HasAVX512] in {
+ def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
+ def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
+
+ def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
+ def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
+}
+
// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
// which produce unnecessary vmovs{s,d} instructions
let Predicates = [HasAVX512] in {
diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td
index ab9112fde8f3..b675aeb36fbb 100644
--- a/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -74,6 +74,11 @@ def X86fild64 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
}]>;
+def X86fist32 : PatFrag<(ops node:$val, node:$ptr),
+ (X86fist node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
def X86fist64 : PatFrag<(ops node:$val, node:$ptr),
(X86fist node:$val, node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
@@ -525,14 +530,20 @@ def ST_FpP80m : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP,
let mayStore = 1, hasSideEffects = 0 in {
def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
-def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
-def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
+ [(X86fist32 RFP32:$src, addr:$op)]>;
+def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP,
+ [(X86fist64 RFP32:$src, addr:$op)]>;
def IST_Fp16m64 : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
-def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
-def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP,
+ [(X86fist32 RFP64:$src, addr:$op)]>;
+def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP,
+ [(X86fist64 RFP64:$src, addr:$op)]>;
def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
-def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
-def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP,
+ [(X86fist32 RFP80:$src, addr:$op)]>;
+def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
+ [(X86fist64 RFP80:$src, addr:$op)]>;
} // mayStore
} // SchedRW, Uses = [FPCW]
@@ -791,9 +802,6 @@ def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>;
def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
-// Used to conv. between f80 and i64 for i64 atomic loads.
-def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>;
-
// FP extensions map onto simple pseudo-value conversions if they are to/from
// the FP stack.
def : Pat<(f64 (any_fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 1775d7570daf..f7e0717952ac 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -884,6 +884,23 @@ defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf6
"cvttsd2si", "cvttsd2si",
WriteCvtSD2I, SSEPackedDouble>,
XD, VEX, VEX_W, VEX_LIG;
+
+defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32,
+ "cvtss2si", "cvtss2si",
+ WriteCvtSS2I, SSEPackedSingle>,
+ XS, VEX, VEX_LIG;
+defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32,
+ "cvtss2si", "cvtss2si",
+ WriteCvtSS2I, SSEPackedSingle>,
+ XS, VEX, VEX_W, VEX_LIG;
+defm VCVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64,
+ "cvtsd2si", "cvtsd2si",
+ WriteCvtSD2I, SSEPackedDouble>,
+ XD, VEX, VEX_LIG;
+defm VCVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64,
+ "cvtsd2si", "cvtsd2si",
+ WriteCvtSD2I, SSEPackedDouble>,
+ XD, VEX, VEX_W, VEX_LIG;
}
// The assembler can recognize rr 64-bit instructions by seeing a rxx
@@ -923,6 +940,12 @@ let Predicates = [UseAVX] in {
(VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
def : Pat<(f64 (any_sint_to_fp GR64:$src)),
(VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
+
+ def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64rr FR32:$src)>;
+ def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64rm addr:$src)>;
+
+ def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64rr FR64:$src)>;
+ def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64rm addr:$src)>;
}
let isCodeGenOnly = 1 in {
@@ -938,6 +961,20 @@ defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64,
defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64,
"cvttsd2si", "cvttsd2si",
WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
+
+defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32,
+ "cvtss2si", "cvtss2si",
+ WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC;
+defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32,
+ "cvtss2si", "cvtss2si",
+ WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC;
+defm CVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64,
+ "cvtsd2si", "cvtsd2si",
+ WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC;
+defm CVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64,
+ "cvtsd2si", "cvtsd2si",
+ WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
+
defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, any_sint_to_fp, i32mem, loadi32,
"cvtsi2ss", "cvtsi2ss{l}",
WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC;
@@ -952,6 +989,16 @@ defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, any_sint_to_fp, i64mem, loadi64,
WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC;
} // isCodeGenOnly = 1
+let Predicates = [UseSSE1] in {
+ def : Pat<(i64 (lrint FR32:$src)), (CVTSS2SI64rr FR32:$src)>;
+ def : Pat<(i64 (lrint (loadf32 addr:$src))), (CVTSS2SI64rm addr:$src)>;
+}
+
+let Predicates = [UseSSE2] in {
+ def : Pat<(i64 (lrint FR64:$src)), (CVTSD2SI64rr FR64:$src)>;
+ def : Pat<(i64 (lrint (loadf64 addr:$src))), (CVTSD2SI64rm addr:$src)>;
+}
+
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
diff --git a/llvm/test/CodeGen/X86/llrint-conv.ll b/llvm/test/CodeGen/X86/llrint-conv.ll
index 1472af1639b2..402daf80a15e 100644
--- a/llvm/test/CodeGen/X86/llrint-conv.ll
+++ b/llvm/test/CodeGen/X86/llrint-conv.ll
@@ -1,65 +1,153 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE
+; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
+; RUN: llc < %s -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefixes=X86,X86-AVX
+; RUN: llc < %s -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X86,X86-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64,X64-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX
define i64 @testmsxs(float %x) {
-; X86-LABEL: testmsxs:
-; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %eax
-; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: flds {{[0-9]+}}(%esp)
-; X86-NEXT: fstps (%esp)
-; X86-NEXT: calll llrintf
-; X86-NEXT: popl %ecx
-; X86-NEXT: .cfi_def_cfa_offset 4
-; X86-NEXT: retl
+; X86-NOSSE-LABEL: testmsxs:
+; X86-NOSSE: # %bb.0: # %entry
+; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
+; X86-NOSSE-NEXT: .cfi_offset %ebp, -8
+; X86-NOSSE-NEXT: movl %esp, %ebp
+; X86-NOSSE-NEXT: .cfi_def_cfa_register %ebp
+; X86-NOSSE-NEXT: andl $-8, %esp
+; X86-NOSSE-NEXT: subl $8, %esp
+; X86-NOSSE-NEXT: flds 8(%ebp)
+; X86-NOSSE-NEXT: fistpll (%esp)
+; X86-NOSSE-NEXT: movl (%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOSSE-NEXT: movl %ebp, %esp
+; X86-NOSSE-NEXT: popl %ebp
+; X86-NOSSE-NEXT: .cfi_def_cfa %esp, 4
+; X86-NOSSE-NEXT: retl
;
-; SSE2-LABEL: testmsxs:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: pushl %eax
-; SSE2-NEXT: .cfi_def_cfa_offset 8
-; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT: movss %xmm0, (%esp)
-; SSE2-NEXT: calll llrintf
-; SSE2-NEXT: popl %ecx
-; SSE2-NEXT: .cfi_def_cfa_offset 4
-; SSE2-NEXT: retl
+; X86-SSE2-LABEL: testmsxs:
+; X86-SSE2: # %bb.0: # %entry
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
+; X86-SSE2-NEXT: .cfi_offset %ebp, -8
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
+; X86-SSE2-NEXT: andl $-8, %esp
+; X86-SSE2-NEXT: subl $8, %esp
+; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: movss %xmm0, (%esp)
+; X86-SSE2-NEXT: flds (%esp)
+; X86-SSE2-NEXT: fistpll (%esp)
+; X86-SSE2-NEXT: movl (%esp), %eax
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
+; X86-SSE2-NEXT: retl
;
-; X64-LABEL: testmsxs:
-; X64: # %bb.0: # %entry
-; X64-NEXT: jmp llrintf # TAILCALL
+; X86-AVX-LABEL: testmsxs:
+; X86-AVX: # %bb.0: # %entry
+; X86-AVX-NEXT: pushl %ebp
+; X86-AVX-NEXT: .cfi_def_cfa_offset 8
+; X86-AVX-NEXT: .cfi_offset %ebp, -8
+; X86-AVX-NEXT: movl %esp, %ebp
+; X86-AVX-NEXT: .cfi_def_cfa_register %ebp
+; X86-AVX-NEXT: andl $-8, %esp
+; X86-AVX-NEXT: subl $8, %esp
+; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vmovss %xmm0, (%esp)
+; X86-AVX-NEXT: flds (%esp)
+; X86-AVX-NEXT: fistpll (%esp)
+; X86-AVX-NEXT: movl (%esp), %eax
+; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-AVX-NEXT: movl %ebp, %esp
+; X86-AVX-NEXT: popl %ebp
+; X86-AVX-NEXT: .cfi_def_cfa %esp, 4
+; X86-AVX-NEXT: retl
+;
+; X64-SSE-LABEL: testmsxs:
+; X64-SSE: # %bb.0: # %entry
+; X64-SSE-NEXT: cvtss2si %xmm0, %rax
+; X64-SSE-NEXT: retq
+;
+; X64-AVX-LABEL: testmsxs:
+; X64-AVX: # %bb.0: # %entry
+; X64-AVX-NEXT: vcvtss2si %xmm0, %rax
+; X64-AVX-NEXT: retq
entry:
%0 = tail call i64 @llvm.llrint.f32(float %x)
ret i64 %0
}
define i64 @testmsxd(double %x) {
-; X86-LABEL: testmsxd:
-; X86: # %bb.0: # %entry
-; X86-NEXT: subl $8, %esp
-; X86-NEXT: .cfi_def_cfa_offset 12
-; X86-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-NEXT: fstpl (%esp)
-; X86-NEXT: calll llrint
-; X86-NEXT: addl $8, %esp
-; X86-NEXT: .cfi_def_cfa_offset 4
-; X86-NEXT: retl
+; X86-NOSSE-LABEL: testmsxd:
+; X86-NOSSE: # %bb.0: # %entry
+; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
+; X86-NOSSE-NEXT: .cfi_offset %ebp, -8
+; X86-NOSSE-NEXT: movl %esp, %ebp
+; X86-NOSSE-NEXT: .cfi_def_cfa_register %ebp
+; X86-NOSSE-NEXT: andl $-8, %esp
+; X86-NOSSE-NEXT: subl $8, %esp
+; X86-NOSSE-NEXT: fldl 8(%ebp)
+; X86-NOSSE-NEXT: fistpll (%esp)
+; X86-NOSSE-NEXT: movl (%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOSSE-NEXT: movl %ebp, %esp
+; X86-NOSSE-NEXT: popl %ebp
+; X86-NOSSE-NEXT: .cfi_def_cfa %esp, 4
+; X86-NOSSE-NEXT: retl
;
-; SSE2-LABEL: testmsxd:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: subl $8, %esp
-; SSE2-NEXT: .cfi_def_cfa_offset 12
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT: movsd %xmm0, (%esp)
-; SSE2-NEXT: calll llrint
-; SSE2-NEXT: addl $8, %esp
-; SSE2-NEXT: .cfi_def_cfa_offset 4
-; SSE2-NEXT: retl
+; X86-SSE2-LABEL: testmsxd:
+; X86-SSE2: # %bb.0: # %entry
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
+; X86-SSE2-NEXT: .cfi_offset %ebp, -8
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
+; X86-SSE2-NEXT: andl $-8, %esp
+; X86-SSE2-NEXT: subl $8, %esp
+; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: movsd %xmm0, (%esp)
+; X86-SSE2-NEXT: fldl (%esp)
+; X86-SSE2-NEXT: fistpll (%esp)
+; X86-SSE2-NEXT: movl (%esp), %eax
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
+; X86-SSE2-NEXT: retl
;
-; X64-LABEL: testmsxd:
-; X64: # %bb.0: # %entry
-; X64-NEXT: jmp llrint # TAILCALL
+; X86-AVX-LABEL: testmsxd:
+; X86-AVX: # %bb.0: # %entry
+; X86-AVX-NEXT: pushl %ebp
+; X86-AVX-NEXT: .cfi_def_cfa_offset 8
+; X86-AVX-NEXT: .cfi_offset %ebp, -8
+; X86-AVX-NEXT: movl %esp, %ebp
+; X86-AVX-NEXT: .cfi_def_cfa_register %ebp
+; X86-AVX-NEXT: andl $-8, %esp
+; X86-AVX-NEXT: subl $8, %esp
+; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
+; X86-AVX-NEXT: fldl (%esp)
+; X86-AVX-NEXT: fistpll (%esp)
+; X86-AVX-NEXT: movl (%esp), %eax
+; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-AVX-NEXT: movl %ebp, %esp
+; X86-AVX-NEXT: popl %ebp
+; X86-AVX-NEXT: .cfi_def_cfa %esp, 4
+; X86-AVX-NEXT: retl
+;
+; X64-SSE-LABEL: testmsxd:
+; X64-SSE: # %bb.0: # %entry
+; X64-SSE-NEXT: cvtsd2si %xmm0, %rax
+; X64-SSE-NEXT: retq
+;
+; X64-AVX-LABEL: testmsxd:
+; X64-AVX: # %bb.0: # %entry
+; X64-AVX-NEXT: vcvtsd2si %xmm0, %rax
+; X64-AVX-NEXT: retq
entry:
%0 = tail call i64 @llvm.llrint.f64(double %x)
ret i64 %0
@@ -68,29 +156,28 @@ entry:
define i64 @testmsll(x86_fp80 %x) {
; X86-LABEL: testmsll:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $12, %esp
-; X86-NEXT: .cfi_def_cfa_offset 16
-; X86-NEXT: fldt {{[0-9]+}}(%esp)
-; X86-NEXT: fstpt (%esp)
-; X86-NEXT: calll llrintl
-; X86-NEXT: addl $12, %esp
-; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: fldt 8(%ebp)
+; X86-NEXT: fistpll (%esp)
+; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
; X86-NEXT: retl
;
-; SSE2-LABEL: testmsll:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: subl $12, %esp
-; SSE2-NEXT: .cfi_def_cfa_offset 16
-; SSE2-NEXT: fldt {{[0-9]+}}(%esp)
-; SSE2-NEXT: fstpt (%esp)
-; SSE2-NEXT: calll llrintl
-; SSE2-NEXT: addl $12, %esp
-; SSE2-NEXT: .cfi_def_cfa_offset 4
-; SSE2-NEXT: retl
-;
; X64-LABEL: testmsll:
; X64: # %bb.0: # %entry
-; X64-NEXT: jmp llrintl # TAILCALL
+; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: fistpll -{{[0-9]+}}(%rsp)
+; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; X64-NEXT: retq
entry:
%0 = tail call i64 @llvm.llrint.f80(x86_fp80 %x)
ret i64 %0
diff --git a/llvm/test/CodeGen/X86/lrint-conv-i32.ll b/llvm/test/CodeGen/X86/lrint-conv-i32.ll
index 888160112e53..21580f53ec9b 100644
--- a/llvm/test/CodeGen/X86/lrint-conv-i32.ll
+++ b/llvm/test/CodeGen/X86/lrint-conv-i32.ll
@@ -1,30 +1,102 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=CHECK,X86
-; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE
+; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
+; RUN: llc < %s -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefixes=X86,X86-AVX
+; RUN: llc < %s -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X86,X86-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64,X64-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX
define i32 @testmsws(float %x) {
-; CHECK-LABEL: testmsws:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: jmp lrintf # TAILCALL
+; X86-NOSSE-LABEL: testmsws:
+; X86-NOSSE: # %bb.0: # %entry
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
+; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fistpl (%esp)
+; X86-NOSSE-NEXT: movl (%esp), %eax
+; X86-NOSSE-NEXT: popl %ecx
+; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: testmsws:
+; X86-SSE2: # %bb.0: # %entry
+; X86-SSE2-NEXT: cvtss2si {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX-LABEL: testmsws:
+; X86-AVX: # %bb.0: # %entry
+; X86-AVX-NEXT: vcvtss2si {{[0-9]+}}(%esp), %eax
+; X86-AVX-NEXT: retl
+;
+; X64-SSE-LABEL: testmsws:
+; X64-SSE: # %bb.0: # %entry
+; X64-SSE-NEXT: cvtss2si %xmm0, %eax
+; X64-SSE-NEXT: retq
+;
+; X64-AVX-LABEL: testmsws:
+; X64-AVX: # %bb.0: # %entry
+; X64-AVX-NEXT: vcvtss2si %xmm0, %eax
+; X64-AVX-NEXT: retq
entry:
%0 = tail call i32 @llvm.lrint.i32.f32(float %x)
ret i32 %0
}
define i32 @testmswd(double %x) {
-; CHECK-LABEL: testmswd:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: jmp lrint # TAILCALL
+; X86-NOSSE-LABEL: testmswd:
+; X86-NOSSE: # %bb.0: # %entry
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
+; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fistpl (%esp)
+; X86-NOSSE-NEXT: movl (%esp), %eax
+; X86-NOSSE-NEXT: popl %ecx
+; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: testmswd:
+; X86-SSE2: # %bb.0: # %entry
+; X86-SSE2-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX-LABEL: testmswd:
+; X86-AVX: # %bb.0: # %entry
+; X86-AVX-NEXT: vcvtsd2si {{[0-9]+}}(%esp), %eax
+; X86-AVX-NEXT: retl
+;
+; X64-SSE-LABEL: testmswd:
+; X64-SSE: # %bb.0: # %entry
+; X64-SSE-NEXT: cvtsd2si %xmm0, %eax
+; X64-SSE-NEXT: retq
+;
+; X64-AVX-LABEL: testmswd:
+; X64-AVX: # %bb.0: # %entry
+; X64-AVX-NEXT: vcvtsd2si %xmm0, %eax
+; X64-AVX-NEXT: retq
entry:
%0 = tail call i32 @llvm.lrint.i32.f64(double %x)
ret i32 %0
}
define i32 @testmsll(x86_fp80 %x) {
-; CHECK-LABEL: testmsll:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: jmp lrintl # TAILCALL
+; X86-LABEL: testmsll:
+; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %eax
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: fistpl (%esp)
+; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: popl %ecx
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: testmsll:
+; X64: # %bb.0: # %entry
+; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: fistpl -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT: retq
entry:
%0 = tail call i32 @llvm.lrint.i32.f80(x86_fp80 %x)
ret i32 %0
diff --git a/llvm/test/CodeGen/X86/lrint-conv-i64.ll b/llvm/test/CodeGen/X86/lrint-conv-i64.ll
index 1b12edd43c51..01b0af2f807f 100644
--- a/llvm/test/CodeGen/X86/lrint-conv-i64.ll
+++ b/llvm/test/CodeGen/X86/lrint-conv-i64.ll
@@ -1,19 +1,33 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX
define i64 @testmsxs(float %x) {
-; CHECK-LABEL: testmsxs:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: jmp lrintf # TAILCALL
+; SSE-LABEL: testmsxs:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: cvtss2si %xmm0, %rax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: testmsxs:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvtss2si %xmm0, %rax
+; AVX-NEXT: retq
entry:
%0 = tail call i64 @llvm.lrint.i64.f32(float %x)
ret i64 %0
}
define i64 @testmsxd(double %x) {
-; CHECK-LABEL: testmsxd:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: jmp lrint # TAILCALL
+; SSE-LABEL: testmsxd:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: cvtsd2si %xmm0, %rax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: testmsxd:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvtsd2si %xmm0, %rax
+; AVX-NEXT: retq
entry:
%0 = tail call i64 @llvm.lrint.i64.f64(double %x)
ret i64 %0
@@ -22,7 +36,10 @@ entry:
define i64 @testmsll(x86_fp80 %x) {
; CHECK-LABEL: testmsll:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: jmp lrintl # TAILCALL
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fistpll -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT: retq
entry:
%0 = tail call i64 @llvm.lrint.i64.f80(x86_fp80 %x)
ret i64 %0
More information about the llvm-commits
mailing list