[llvm] r210870 - [FastISel][X86] Add support for cvttss2si/cvttsd2si intrinsics.
Juergen Ributzka
juergen at apple.com
Thu Jun 12 19:21:58 PDT 2014
Author: ributzka
Date: Thu Jun 12 21:21:58 2014
New Revision: 210870
URL: http://llvm.org/viewvc/llvm-project?rev=210870&view=rev
Log:
[FastISel][X86] Add support for cvttss2si/cvttsd2si intrinsics.
This adds support for the cvttss2si/cvttsd2si intrinsics. Preceding
insertelement instructions are folded into the conversion instruction (if
possible).
Added:
llvm/trunk/test/CodeGen/X86/fast-isel-sse12-fptoint.ll
Modified:
llvm/trunk/lib/Target/X86/X86FastISel.cpp
Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=210870&r1=210869&r2=210870&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Thu Jun 12 21:21:58 2014
@@ -1962,6 +1962,72 @@ bool X86FastISel::X86VisitIntrinsicCall(
UpdateValueMap(&I, ResultReg, 2);
return true;
}
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64: {
+ bool IsInputDouble;
+ switch (I.getIntrinsicID()) {
+ default: llvm_unreachable("Unexpected intrinsic.");
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ if (!Subtarget->hasSSE1())
+ return false;
+ IsInputDouble = false;
+ break;
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64:
+ if (!Subtarget->hasSSE2())
+ return false;
+ IsInputDouble = true;
+ break;
+ }
+
+ Type *RetTy = I.getCalledFunction()->getReturnType();
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ static const unsigned CvtOpc[2][2][2] = {
+ { { X86::CVTTSS2SIrr, X86::VCVTTSS2SIrr },
+ { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr } },
+ { { X86::CVTTSD2SIrr, X86::VCVTTSD2SIrr },
+ { X86::CVTTSD2SI64rr, X86::VCVTTSD2SI64rr } }
+ };
+ bool HasAVX = Subtarget->hasAVX();
+ unsigned Opc;
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected result type.");
+ case MVT::i32: Opc = CvtOpc[IsInputDouble][0][HasAVX]; break;
+ case MVT::i64: Opc = CvtOpc[IsInputDouble][1][HasAVX]; break;
+ }
+
+ // Check if we can fold insertelement instructions into the convert.
+ const Value *Op = I.getArgOperand(0);
+ while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
+ const Value *Index = IE->getOperand(2);
+ if (!isa<ConstantInt>(Index))
+ break;
+ unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
+
+ if (Idx == 0) {
+ Op = IE->getOperand(1);
+ break;
+ }
+ Op = IE->getOperand(0);
+ }
+
+ unsigned Reg = getRegForValue(Op);
+ if (Reg == 0)
+ return false;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
+ .addReg(Reg);
+
+ UpdateValueMap(&I, ResultReg);
+ return true;
+ }
}
}
Added: llvm/trunk/test/CodeGen/X86/fast-isel-sse12-fptoint.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-sse12-fptoint.ll?rev=210870&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-sse12-fptoint.ll (added)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-sse12-fptoint.ll Thu Jun 12 21:21:58 2014
@@ -0,0 +1,54 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=AVX
+
+define i32 @cvt_test1(float %a) {
+; SSE-LABEL: cvt_test1
+; SSE: cvttss2si %xmm0, %eax
+; AVX-LABEL: cvt_test1
+; AVX: vcvttss2si %xmm0, %eax
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 0.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 0.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 0.000000e+00, i32 3
+ %5 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %4)
+ ret i32 %5
+}
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
+
+define i64 @cvt_test2(float %a) {
+; SSE-LABEL: cvt_test2
+; SSE: cvttss2si %xmm0, %rax
+; AVX-LABEL: cvt_test2
+; AVX: vcvttss2si %xmm0, %rax
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 0.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 0.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 0.000000e+00, i32 3
+ %5 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %4)
+ ret i64 %5
+}
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
+
+define i32 @cvt_test3(double %a) {
+; SSE-LABEL: cvt_test3
+; SSE: cvttsd2si %xmm0, %eax
+; AVX-LABEL: cvt_test3
+; AVX: vcvttsd2si %xmm0, %eax
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 0.000000e+00, i32 1
+ %3 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %2)
+ ret i32 %3
+}
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
+
+define i64 @cvt_test4(double %a) {
+; SSE-LABEL: cvt_test4
+; SSE: cvttsd2si %xmm0, %rax
+; AVX-LABEL: cvt_test4
+; AVX: vcvttsd2si %xmm0, %rax
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 0.000000e+00, i32 1
+ %3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %2)
+ ret i64 %3
+}
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
More information about the llvm-commits
mailing list