[PATCH] D62710: [X86] Disable f32->f64 extload when sse2 is enabled
Phabricator via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 9 21:34:26 PDT 2019
This revision was automatically updated to reflect the committed changes.
Closed by commit rL362919: [X86] Disable f32->f64 extload when sse2 is enabled (authored by ctopper, committed by ).
Changed prior to commit:
https://reviews.llvm.org/D62710?vs=202341&id=203767#toc
Repository:
rL LLVM
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D62710/new/
https://reviews.llvm.org/D62710
Files:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
@@ -534,6 +534,12 @@
addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
: &X86::FR64RegClass);
+ // Disable f32->f64 extload as we can only generate this in one instruction
+ // under optsize. So its easier to pattern match (fpext (load)) for that
+ // case instead of needing to emit 2 instructions for extload in the
+ // non-optsize case.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+
for (auto VT : { MVT::f32, MVT::f64 }) {
// Use ANDPD to simulate FABS.
setOperationAction(ISD::FABS, VT, Custom);
Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
===================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td
@@ -7521,14 +7521,6 @@
(VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Requires<[HasAVX512, OptForSize]>;
-def : Pat<(f64 (extloadf32 addr:$src)),
- (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[HasAVX512, OptForSize]>;
-
-def : Pat<(f64 (extloadf32 addr:$src)),
- (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
- Requires<[HasAVX512, OptForSpeed]>;
-
def : Pat<(f32 (fpround FR64X:$src)),
(VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
Requires<[HasAVX512]>;
Index: llvm/trunk/lib/Target/X86/X86InstrSSE.td
===================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td
@@ -1251,13 +1251,6 @@
def : Pat<(fpextend (loadf32 addr:$src)),
(VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>;
-def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[UseAVX, OptForSize]>;
-def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
- Requires<[UseAVX, OptForSpeed]>;
-
let isCodeGenOnly = 1 in {
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
@@ -1265,21 +1258,11 @@
XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>;
def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (extloadf32 addr:$src))]>,
+ [(set FR64:$dst, (fpextend (loadf32 addr:$src)))]>,
XS, Requires<[UseSSE2, OptForSize]>,
Sched<[WriteCvtSS2SD.Folded]>;
} // isCodeGenOnly = 1
-// extload f32 -> f64. This matches load+fpextend because we have a hack in
-// the isel (PreprocessForFPConvert) that can introduce loads after dag
-// combine.
-// Since these loads aren't folded into the fpextend, we have to match it
-// explicitly here.
-def : Pat<(fpextend (loadf32 addr:$src)),
- (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2, OptForSize]>;
-def : Pat<(extloadf32 addr:$src),
- (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>;
-
let hasSideEffects = 0 in {
def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D62710.203767.patch
Type: text/x-patch
Size: 3552 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190610/5b91cb33/attachment.bin>
More information about the llvm-commits
mailing list