[llvm] 228a2bc - [X86] Teach combineCVTPH2PS to shrink v8i16 loads when the output type is v4f32. Remove extra isel patterns.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 21 18:12:20 PST 2020
Author: Craig Topper
Date: 2020-02-21T18:11:07-08:00
New Revision: 228a2bc9b70c3d93bd28f0038a8664ef8dac042e
URL: https://github.com/llvm/llvm-project/commit/228a2bc9b70c3d93bd28f0038a8664ef8dac042e
DIFF: https://github.com/llvm/llvm-project/commit/228a2bc9b70c3d93bd28f0038a8664ef8dac042e.diff
LOG: [X86] Teach combineCVTPH2PS to shrink v8i16 loads when the output type is v4f32. Remove extra isel patterns.
Similar to what do for other operations that use a subset of bits.
Allows us to remove a pattern that shrinks a load. Which was
incorrect if the load was volatile.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrSSE.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 64cae53b0d40..a7e4bc740147 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -43728,6 +43728,26 @@ static SDValue combineCVTPH2PS(SDNode *N, SelectionDAG &DAG,
if (TLI.SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
DCI))
return SDValue(N, 0);
+
+ if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse()) {
+ LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0));
+ // Unless the load is volatile or atomic.
+ if (LN->isSimple()) {
+ SDLoc dl(N);
+ SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+ SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue VZLoad =
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, MVT::i64,
+ LN->getPointerInfo(),
+ LN->getAlignment(),
+ LN->getMemOperand()->getFlags());
+ SDValue Convert = DAG.getNode(N->getOpcode(), dl, MVT::v4f32,
+ DAG.getBitcast(MVT::v8i16, VZLoad));
+ DCI.CombineTo(N, Convert);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
+ return SDValue(N, 0);
+ }
+ }
}
return SDValue();
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index de3fe20aedcf..09d1ea66f84f 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -8566,7 +8566,7 @@ let Predicates = [HasDQI, HasVLX] in {
let Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
- X86MemOperand x86memop, PatFrag ld_frag,
+ X86MemOperand x86memop, dag ld_dag,
X86FoldableSchedWrite sched> {
defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
(ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
@@ -8575,8 +8575,8 @@ multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
T8PD, Sched<[sched]>;
defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
(ins x86memop:$src), "vcvtph2ps", "$src", "$src",
- (X86any_cvtph2ps (_src.VT (ld_frag addr:$src))),
- (X86cvtph2ps (_src.VT (ld_frag addr:$src)))>,
+ (X86any_cvtph2ps (_src.VT ld_dag)),
+ (X86cvtph2ps (_src.VT ld_dag))>,
T8PD, Sched<[sched.Folded]>;
}
@@ -8591,22 +8591,21 @@ multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
}
let Predicates = [HasAVX512] in
- defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load,
- WriteCvtPH2PSZ>,
+ defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
+ (load addr:$src), WriteCvtPH2PSZ>,
avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
let Predicates = [HasVLX] in {
defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
- load, WriteCvtPH2PSY>, EVEX, EVEX_V256,
+ (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
EVEX_CD8<32, CD8VH>;
defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
- load, WriteCvtPH2PS>, EVEX, EVEX_V128,
+ (bitconvert (v2i64 (X86vzload64 addr:$src))),
+ WriteCvtPH2PS>, EVEX, EVEX_V128,
EVEX_CD8<32, CD8VH>;
// Pattern match vcvtph2ps of a scalar i64 load.
- def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
- (VCVTPH2PSZ128rm addr:$src)>;
def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
(v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
(VCVTPH2PSZ128rm addr:$src)>;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index a8c285cfe5f5..abbd513ce418 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -7339,10 +7339,10 @@ multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop,
"vcvtph2ps\t{$src, $dst|$dst, $src}",
[(set RC:$dst, (X86any_cvtph2ps VR128:$src))]>,
T8PD, VEX, Sched<[sched]>;
+ let hasSideEffects = 0, mayLoad = 1 in
def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}",
- [(set RC:$dst, (X86any_cvtph2ps (loadv8i16 addr:$src)))]>,
- T8PD, VEX, Sched<[sched.Folded]>;
+ []>, T8PD, VEX, Sched<[sched.Folded]>;
}
multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
@@ -7373,6 +7373,8 @@ let Predicates = [HasF16C, NoVLX] in {
def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16
(v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(VCVTPH2PSrm addr:$src)>;
+ def : Pat<(v8f32 (X86any_cvtph2ps (loadv8i16 addr:$src))),
+ (VCVTPH2PSYrm addr:$src)>;
def : Pat<(store (f64 (extractelt
(bc_v2f64 (v8i16 (X86any_cvtps2ph VR128:$src1, timm:$src2))),
More information about the llvm-commits
mailing list