[llvm] r257285 - Optimized instruction sequence for sitofp operation on X86-32
Elena Demikhovsky via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 10 01:41:23 PST 2016
Author: delena
Date: Sun Jan 10 03:41:22 2016
New Revision: 257285
URL: http://llvm.org/viewvc/llvm-project?rev=257285&view=rev
Log:
Optimized instruction sequence for sitofp operation on X86-32
Optimized sitofp i64 %x to double. The current sequence
movl %ecx, 8(%esp)
movl %edx, 12(%esp)
fildll 8(%esp)
is replaced with:
movd %ecx, %xmm0
movd %edx, %xmm1
punpckldq %xmm1, %xmm0
movq %xmm0, 8(%esp)
Differential Revision: http://reviews.llvm.org/D15946
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll
llvm/trunk/test/CodeGen/X86/scalar-int-to-fp.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=257285&r1=257284&r2=257285&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Jan 10 03:41:22 2016
@@ -265,7 +265,8 @@ X86TargetLowering::X86TargetLowering(con
// Without SSE, i64->f64 goes through memory.
setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
}
- }
+ } else if (!Subtarget->is64Bit())
+ setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
// Scalar integer divide and remainder are lowered to use operations that
// produce two results, to match the available instructions. This exposes
@@ -12672,13 +12673,21 @@ SDValue X86TargetLowering::LowerSINT_TO_
return Op;
}
+ SDValue ValueToStore = Op.getOperand(0);
+ if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
+ !Subtarget->is64Bit())
+ // Bitcasting to f64 here allows us to do a single 64-bit store from
+ // an SSE register, avoiding the store forwarding penalty that would come
+ // with two 32-bit stores.
+ ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
+
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
auto PtrVT = getPointerTy(MF.getDataLayout());
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
SDValue Chain = DAG.getStore(
- DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot,
+ DAG.getEntryNode(), dl, ValueToStore, StackSlot,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), false,
false, 0);
return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
@@ -13051,7 +13060,13 @@ SDValue X86TargetLowering::LowerUINT_TO_
}
assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ SDValue ValueToStore = Op.getOperand(0);
+ if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget->is64Bit())
+ // Bitcasting to f64 here allows us to do a single 64-bit store from
+ // an SSE register, avoiding the store forwarding penalty that would come
+ // with two 32-bit stores.
+ ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, ValueToStore,
StackSlot, MachinePointerInfo(),
false, false, 0);
// For i64 source, we need to add the appropriate power of 2 if the input
@@ -19536,24 +19551,37 @@ static SDValue LowerBITCAST(SDValue Op,
MVT SrcVT = Op.getOperand(0).getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();
- if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) {
+ if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
+ SrcVT == MVT::i64) {
assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
if (DstVT != MVT::f64)
// This conversion needs to be expanded.
return SDValue();
- SDValue InVec = Op->getOperand(0);
- SDLoc dl(Op);
- unsigned NumElts = SrcVT.getVectorNumElements();
- MVT SVT = SrcVT.getVectorElementType();
-
- // Widen the vector in input in the case of MVT::v2i32.
- // Example: from MVT::v2i32 to MVT::v4i32.
+ SDValue Op0 = Op->getOperand(0);
SmallVector<SDValue, 16> Elts;
- for (unsigned i = 0, e = NumElts; i != e; ++i)
- Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, InVec,
- DAG.getIntPtrConstant(i, dl)));
-
+ SDLoc dl(Op);
+ unsigned NumElts;
+ MVT SVT;
+ if (SrcVT.isVector()) {
+ NumElts = SrcVT.getVectorNumElements();
+ SVT = SrcVT.getVectorElementType();
+
+ // Widen the vector in input in the case of MVT::v2i32.
+ // Example: from MVT::v2i32 to MVT::v4i32.
+ for (unsigned i = 0, e = NumElts; i != e; ++i)
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, Op0,
+ DAG.getIntPtrConstant(i, dl)));
+ } else {
+ assert(SrcVT == MVT::i64 && !Subtarget->is64Bit() &&
+ "Unexpected source type in LowerBITCAST");
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
+ DAG.getIntPtrConstant(0, dl)));
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
+ DAG.getIntPtrConstant(1, dl)));
+ NumElts = 2;
+ SVT = MVT::i32;
+ }
// Explicitly mark the extra elements as Undef.
Elts.append(NumElts, DAG.getUNDEF(SVT));
Modified: llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll?rev=257285&r1=257284&r2=257285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll (original)
+++ llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll Sun Jan 10 03:41:22 2016
@@ -1,5 +1,5 @@
; REQUIRES: asserts
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats 2>&1 | grep asm-printer | grep 14
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats 2>&1 | grep asm-printer | grep 13
define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind {
entry:
Modified: llvm/trunk/test/CodeGen/X86/scalar-int-to-fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/scalar-int-to-fp.ll?rev=257285&r1=257284&r2=257285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/scalar-int-to-fp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/scalar-int-to-fp.ll Sun Jan 10 03:41:22 2016
@@ -74,9 +74,16 @@ define x86_fp80 @s32_to_x(i32 %a) nounwi
}
; CHECK-LABEL: u64_to_f
+; AVX512_32: vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX512_32: vmovlpd %xmm0, {{[0-9]+}}(%esp)
; AVX512_32: fildll
+
; AVX512_64: vcvtusi2ssq
+
+; SSE2_32: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2_32: movq %xmm0, {{[0-9]+}}(%esp)
; SSE2_32: fildll
+
; SSE2_64: cvtsi2ssq
; X87: fildll
define float @u64_to_f(i64 %a) nounwind {
@@ -95,6 +102,24 @@ define float @s64_to_f(i64 %a) nounwind
ret float %r
}
+; CHECK-LABEL: s64_to_f_2
+; SSE2_32: movd %ecx, %xmm0
+; SSE2_32: movd %eax, %xmm1
+; SSE2_32: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2_32: movq %xmm1, {{[0-9]+}}(%esp)
+; SSE2_32: fildll {{[0-9]+}}(%esp)
+
+; AVX512_32: vmovd %eax, %xmm0
+; AVX512_32: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX512_32: vmovlpd %xmm0, {{[0-9]+}}(%esp)
+; AVX512_32: fildll {{[0-9]+}}(%esp)
+
+define float @s64_to_f_2(i64 %a) nounwind {
+ %a1 = add i64 %a, 5
+ %r = sitofp i64 %a1 to float
+ ret float %r
+}
+
; CHECK-LABEL: u64_to_d
; AVX512_32: vpunpckldq
; AVX512_64: vcvtusi2sdq
@@ -117,6 +142,24 @@ define double @s64_to_d(i64 %a) nounwind
ret double %r
}
+; CHECK-LABEL: s64_to_d_2
+; SSE2_32: movd %ecx, %xmm0
+; SSE2_32: movd %eax, %xmm1
+; SSE2_32: punpckldq %xmm0, %xmm1
+; SSE2_32: movq %xmm1, {{[0-9]+}}(%esp)
+; SSE2_32: fildll
+
+; AVX512_32: vmovd %eax, %xmm0
+; AVX512_32: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX512_32: vmovlpd %xmm0, {{[0-9]+}}(%esp)
+; AVX512_32: fildll
+
+define double @s64_to_d_2(i64 %a) nounwind {
+ %b = add i64 %a, 5
+ %f = sitofp i64 %b to double
+ ret double %f
+}
+
; CHECK-LABEL: u64_to_x
; CHECK: fildll
define x86_fp80 @u64_to_x(i64 %a) nounwind {
More information about the llvm-commits
mailing list