[llvm-commits] [llvm] r163528 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86InstrFragmentsSIMD.td lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/vec_fpext.ll
Michael Liao
michael.liao at intel.com
Mon Sep 10 11:33:51 PDT 2012
Author: hliao
Date: Mon Sep 10 13:33:51 2012
New Revision: 163528
URL: http://llvm.org/viewvc/llvm-project?rev=163528&view=rev
Log:
Enhance PR11334 fix to support extload from v2f32/v4f32
- Fix an remaining issue of PR11674 as well
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/vec_fpext.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=163528&r1=163527&r2=163528&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Sep 10 13:33:51 2012
@@ -932,6 +932,8 @@
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal);
}
if (Subtarget->hasSSE41()) {
@@ -1043,6 +1045,8 @@
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, Legal);
+
setOperationAction(ISD::SRL, MVT::v16i16, Custom);
setOperationAction(ISD::SRL, MVT::v32i8, Custom);
Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=163528&r1=163527&r2=163528&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Mon Sep 10 13:33:51 2012
@@ -240,6 +240,10 @@
def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
+// 128-/256-bit extload pattern fragments
+def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
+def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
+
// Like 'store', but always requires 128-bit vector alignment.
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=163528&r1=163527&r2=163528&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Sep 10 13:33:51 2012
@@ -2007,10 +2007,10 @@
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
IIC_SSE_CVT_PD_RR>, TB, VEX;
-let neverHasSideEffects = 1, mayLoad = 1 in
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RM>, TB, VEX;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>, TB, VEX;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
@@ -2028,10 +2028,10 @@
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
IIC_SSE_CVT_PD_RR>, TB;
-let neverHasSideEffects = 1, mayLoad = 1 in
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RM>, TB;
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>, TB;
}
// Convert Packed DW Integers to Packed Double FP
@@ -2134,7 +2134,7 @@
(VCVTPS2PDrr VR128:$src)>;
def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
(VCVTPS2PDYrr VR128:$src)>;
- def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
+ def : Pat<(v4f64 (extloadv4f32 addr:$src)),
(VCVTPS2PDYrm addr:$src)>;
}
Modified: llvm/trunk/test/CodeGen/X86/vec_fpext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fpext.ll?rev=163528&r1=163527&r2=163528&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fpext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fpext.ll Mon Sep 10 13:33:51 2012
@@ -1,14 +1,38 @@
; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck --check-prefix=AVX %s
; PR11674
define void @fpext_frommem(<2 x float>* %in, <2 x double>* %out) {
entry:
-; TODO: We should be able to generate cvtps2pd for the load.
-; For now, just check that we generate something sane.
-; CHECK: cvtss2sd
-; CHECK: cvtss2sd
+; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
+; AVX: vcvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
%0 = load <2 x float>* %in, align 8
%1 = fpext <2 x float> %0 to <2 x double>
store <2 x double> %1, <2 x double>* %out, align 1
ret void
}
+
+define void @fpext_frommem4(<4 x float>* %in, <4 x double>* %out) {
+entry:
+; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
+; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}}
+; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
+ %0 = load <4 x float>* %in
+ %1 = fpext <4 x float> %0 to <4 x double>
+ store <4 x double> %1, <4 x double>* %out, align 1
+ ret void
+}
+
+define void @fpext_frommem8(<8 x float>* %in, <8 x double>* %out) {
+entry:
+; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
+; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}}
+; CHECK: cvtps2pd 16(%{{.+}}), %xmm{{[0-9]+}}
+; CHECK: cvtps2pd 24(%{{.+}}), %xmm{{[0-9]+}}
+; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
+; AVX: vcvtps2pd 16(%{{.+}}), %ymm{{[0-9]+}}
+ %0 = load <8 x float>* %in
+ %1 = fpext <8 x float> %0 to <8 x double>
+ store <8 x double> %1, <8 x double>* %out, align 1
+ ret void
+}
More information about the llvm-commits
mailing list