[llvm] d41ea65 - [X86] Add DAG combines to enable removing of movddup/vbroadcast + simple_load isel patterns.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 7 15:22:21 PST 2020
Author: Craig Topper
Date: 2020-03-07T15:22:02-08:00
New Revision: d41ea65ee8e964953a324b394bcb8279af7510fd
URL: https://github.com/llvm/llvm-project/commit/d41ea65ee8e964953a324b394bcb8279af7510fd
DIFF: https://github.com/llvm/llvm-project/commit/d41ea65ee8e964953a324b394bcb8279af7510fd.diff
LOG: [X86] Add DAG combines to enable removing of movddup/vbroadcast + simple_load isel patterns.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrSSE.td
llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e65a29d8f90e..ff3739da2dca 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35074,6 +35074,31 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return R;
switch (Opcode) {
+ case X86ISD::MOVDDUP: {
+ SDValue Src = N.getOperand(0);
+ // Turn a 128-bit MOVDDUP of a full vector load into movddup+vzload.
+ if (VT == MVT::v2f64 && Src.hasOneUse() &&
+ ISD::isNormalLoad(Src.getNode())) {
+ LoadSDNode *LN = cast<LoadSDNode>(Src);
+ // Unless the load is volatile or atomic.
+ if (LN->isSimple()) {
+ SDVTList Tys = DAG.getVTList(MVT::v2f64, MVT::Other);
+ SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue VZLoad =
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, MVT::f64,
+ LN->getPointerInfo(),
+ LN->getAlignment(),
+ LN->getMemOperand()->getFlags());
+ SDValue Movddup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, VZLoad);
+ DCI.CombineTo(N.getNode(), Movddup);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(LN);
+ return N; // Return N so it doesn't get rechecked!
+ }
+ }
+
+ return SDValue();
+ }
case X86ISD::VBROADCAST: {
SDValue Src = N.getOperand(0);
SDValue BC = peekThroughBitcasts(Src);
@@ -35162,6 +35187,26 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
}
}
+ // vbroadcast(vector load X) -> vbroadcast_load
+ if (SrcVT == MVT::v2f64 && Src.hasOneUse() &&
+ ISD::isNormalLoad(Src.getNode())) {
+ LoadSDNode *LN = cast<LoadSDNode>(Src);
+ // Unless the load is volatile or atomic.
+ if (LN->isSimple()) {
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue BcastLd =
+ DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
+ MVT::f64, LN->getPointerInfo(),
+ LN->getAlignment(),
+ LN->getMemOperand()->getFlags());
+ DCI.CombineTo(N.getNode(), BcastLd);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(LN);
+ return N; // Return N so it doesn't get rechecked!
+ }
+ }
+
return SDValue();
}
case X86ISD::BLENDI: {
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index f5e06e4b02e6..a080f2f7cb18 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -10858,8 +10858,6 @@ defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>
let Predicates = [HasVLX] in {
def : Pat<(v2f64 (X86VBroadcast f64:$src)),
(VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
-def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
- (VMOVDDUPZ128rm addr:$src)>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
(v2f64 VR128X:$src0)),
@@ -10875,13 +10873,6 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
immAllZerosV),
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
-
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
- (v2f64 VR128X:$src0)),
- (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
- immAllZerosV),
- (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 9659145a495c..4c87963296d0 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -4476,16 +4476,11 @@ defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>;
let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(X86Movddup (v2f64 (simple_load addr:$src))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
}
let Predicates = [UseSSE3] in {
- // No need for aligned memory as this only loads 64-bits.
- def : Pat<(X86Movddup (v2f64 (simple_load addr:$src))),
- (MOVDDUPrm addr:$src)>;
def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
(MOVDDUPrm addr:$src)>;
}
@@ -7612,8 +7607,6 @@ let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v2f64 (X86VBroadcast v2f64:$src)),
(VMOVDDUPrr VR128:$src)>;
- def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
- (VMOVDDUPrm addr:$src)>;
}
let Predicates = [HasAVX1Only] in {
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
index 9a759e35feee..50a250ba1adf 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
-; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX2,X86-AVX2
-; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X86,AVX512,X86-AVX512
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX2,X64-AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X64,AVX512,X64-AVX512
+; RUN: llc < %s -disable-peephole -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
+; RUN: llc < %s -disable-peephole -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX2,X86-AVX2
+; RUN: llc < %s -disable-peephole -mtriple=i686-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X86,AVX512,X86-AVX512
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX2,X64-AVX2
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X64,AVX512,X64-AVX512
;
; Combine tests involving AVX target shuffles
More information about the llvm-commits
mailing list