[llvm] r332573 - [X86] Add OptForSize to a couple load folding patterns. Remove some bad FIXME comments.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed May 16 22:41:11 PDT 2018
Author: ctopper
Date: Wed May 16 22:41:11 2018
New Revision: 332573
URL: http://llvm.org/viewvc/llvm-project?rev=332573&view=rev
Log:
[X86] Add OptForSize to a couple load folding patterns. Remove some bad FIXME comments.
The FIXME comments were about preventing load folding to avoid a partial xmm update. But these instructions use GPR as input when the load isn't folded. This won't help prevent a partial xmm update.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=332573&r1=332572&r2=332573&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed May 16 22:41:11 2018
@@ -7194,7 +7194,7 @@ def : Pat<(f64 (fpextend FR32X:$src)),
Requires<[HasAVX512]>;
def : Pat<(f64 (fpextend (loadf32 addr:$src))),
(VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[HasAVX512]>;
+ Requires<[HasAVX512, OptForSize]>;
def : Pat<(f64 (extloadf32 addr:$src)),
(VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=332573&r1=332572&r2=332573&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed May 16 22:41:11 2018
@@ -897,8 +897,6 @@ let Constraints = "$src1 = $dst", AddedC
// SSE 1 & 2 - Conversion Instructions
//===----------------------------------------------------------------------===//
-// FIXME: We probably want to match the rm form only when optimizing for
-// size, to avoid false depenendecies (see sse_fp_unop_s for details)
multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
string asm, X86FoldableSchedWrite sched> {
@@ -925,8 +923,6 @@ let hasSideEffects = 0 in {
}
}
-// FIXME: We probably want to match the rm form only when optimizing for
-// size, to avoid false depenendecies (see sse_fp_unop_s for details)
multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm,
X86FoldableSchedWrite sched> {
@@ -1301,24 +1297,25 @@ def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
// Convert scalar single to scalar double
// SSE2 instructions with XS prefix
-let hasSideEffects = 0, Predicates = [UseAVX] in {
+let hasSideEffects = 0 in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR64:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable,
- Sched<[WriteCvtSS2SD]>;
+ Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>;
let mayLoad = 1 in
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR64:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable,
- Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>;
+ Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>,
+ Requires<[UseAVX, OptForSize]>;
}
def : Pat<(f64 (fpextend FR32:$src)),
(VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>;
def : Pat<(fpextend (loadf32 addr:$src)),
- (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>;
+ (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>;
def : Pat<(extloadf32 addr:$src),
(VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>,
@@ -1343,7 +1340,7 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (out
// Since these loads aren't folded into the fpextend, we have to match it
// explicitly here.
def : Pat<(fpextend (loadf32 addr:$src)),
- (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2]>;
+ (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2, OptForSize]>;
def : Pat<(extloadf32 addr:$src),
(CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>;
More information about the llvm-commits
mailing list