[llvm] ef4f939 - [X86] Remove isel patterns for (X86VBroadcast (i16 (trunc (i32 (load))))). Replace with a DAG combine to form VBROADCAST_LOAD.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 10 00:19:28 PDT 2020
Author: Craig Topper
Date: 2020-03-10T00:07:07-07:00
New Revision: ef4f939d389e2abe6fdce45bd49cc71237429cb6
URL: https://github.com/llvm/llvm-project/commit/ef4f939d389e2abe6fdce45bd49cc71237429cb6
DIFF: https://github.com/llvm/llvm-project/commit/ef4f939d389e2abe6fdce45bd49cc71237429cb6.diff
LOG: [X86] Remove isel patterns for (X86VBroadcast (i16 (trunc (i32 (load))))). Replace with a DAG combine to form VBROADCAST_LOAD.
isTypeDesirableForOp prevents loads from being shrunk to i16 by DAG
combine. Because of this we can't just match the broadcast and a
scalar load. So look for broadcast+truncate+load and form a
vbroadcast_load during DAG combine. This replaces what was
previously done as an isel pattern and I think fixes it so we
won't change the size of a volatile load. But my main motivation
is just to clean up our isel patterns.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrSSE.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index fb8156b9ac45..ab677a097db4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35171,6 +35171,28 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return N; // Return N so it doesn't get rechecked!
}
+ // Due to isTypeDesirableForOp, we won't always shrink a load truncated to
+ // i16. So shrink it ourselves if we can make a broadcast_load.
+ if (SrcVT == MVT::i16 && Src.getOpcode() == ISD::TRUNCATE &&
+ Src.hasOneUse() && ISD::isNormalLoad(Src.getOperand(0).getNode()) &&
+ Src.getOperand(0).hasOneUse()) {
+ assert(Subtarget.hasAVX2() && "Expected AVX2");
+ LoadSDNode *LN = cast<LoadSDNode>(Src.getOperand(0));
+ if (LN->isSimple()) {
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue BcastLd =
+ DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
+ MVT::i16, LN->getPointerInfo(),
+ LN->getAlignment(),
+ LN->getMemOperand()->getFlags());
+ DCI.CombineTo(N.getNode(), BcastLd);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(LN);
+ return N; // Return N so it doesn't get rechecked!
+ }
+ }
+
// vbroadcast(vzload X) -> vbroadcast_load X
if (Src.getOpcode() == X86ISD::VZEXT_LOAD && Src.hasOneUse()) {
MemSDNode *LN = cast<MemIntrinsicSDNode>(Src);
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 9c4f6ae1c5ca..2af3cf89d77d 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1426,10 +1426,6 @@ multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
let Predicates = [HasVLX, HasBWI] in {
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
// This means we'll encounter truncated i32 loads; match that here.
- def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
- (VPBROADCASTWZ128rm addr:$src)>;
- def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
- (VPBROADCASTWZ256rm addr:$src)>;
def : Pat<(v8i16 (X86VBroadcast
(i16 (trunc (i32 (extloadi16 addr:$src)))))),
(VPBROADCASTWZ128rm addr:$src)>;
@@ -1446,8 +1442,6 @@ let Predicates = [HasVLX, HasBWI] in {
let Predicates = [HasBWI] in {
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
// This means we'll encounter truncated i32 loads; match that here.
- def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
- (VPBROADCASTWZrm addr:$src)>;
def : Pat<(v32i16 (X86VBroadcast
(i16 (trunc (i32 (extloadi16 addr:$src)))))),
(VPBROADCASTWZrm addr:$src)>;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index ad24838ada92..04ca5f07c378 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -7517,10 +7517,6 @@ defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, X86VBroadcastl
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
// This means we'll encounter truncated i32 loads; match that here.
- def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
- (VPBROADCASTWrm addr:$src)>;
- def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
- (VPBROADCASTWYrm addr:$src)>;
def : Pat<(v8i16 (X86VBroadcast
(i16 (trunc (i32 (extloadi16 addr:$src)))))),
(VPBROADCASTWrm addr:$src)>;
More information about the llvm-commits
mailing list