[llvm] 70e4fb8 - [X86] Add DAG combine to turn (vzext_movl (vbroadcast_load)) -> vzext_load.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 8 00:36:05 PST 2020
Author: Craig Topper
Date: 2020-03-08T00:35:40-08:00
New Revision: 70e4fb8a53984ff57b503cf13706c7e6683d650e
URL: https://github.com/llvm/llvm-project/commit/70e4fb8a53984ff57b503cf13706c7e6683d650e
DIFF: https://github.com/llvm/llvm-project/commit/70e4fb8a53984ff57b503cf13706c7e6683d650e.diff
LOG: [X86] Add DAG combine to turn (vzext_movl (vbroadcast_load)) -> vzext_load.
If we're zeroing the other elements then we don't need the broadcast.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-extend-inreg.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 11f9722914db..f2fe4a03a02b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35965,9 +35965,30 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
VT.getVectorElementType(),
LN->getPointerInfo(),
LN->getAlignment(),
- MachineMemOperand::MOLoad);
+ LN->getMemOperand()->getFlags());
+ DCI.CombineTo(N, VZLoad);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(LN);
+ return SDValue(N, 0);
+ }
+ }
+
+ // If this a VZEXT_MOVL of a VBROADCAST_LOAD, we don't need the broadcast and
+ // can just use a VZEXT_LOAD.
+ // FIXME: Is there some way to do this with SimplifyDemandedVectorElts?
+ if (N->getOpcode() == X86ISD::VZEXT_MOVL && N->getOperand(0).hasOneUse() &&
+ N->getOperand(0).getOpcode() == X86ISD::VBROADCAST_LOAD) {
+ auto *LN = cast<MemSDNode>(N->getOperand(0));
+ if (VT.getScalarSizeInBits() == LN->getMemoryVT().getSizeInBits()) {
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue VZLoad =
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
+ LN->getMemoryVT(), LN->getMemOperand());
+ DCI.CombineTo(N, VZLoad);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
- return VZLoad;
+ DCI.recursivelyDeleteUnusedNodes(LN);
+ return SDValue(N, 0);
}
}
diff --git a/llvm/test/CodeGen/X86/vector-extend-inreg.ll b/llvm/test/CodeGen/X86/vector-extend-inreg.ll
index f60bf4b01095..98a35c4a7934 100644
--- a/llvm/test/CodeGen/X86/vector-extend-inreg.ll
+++ b/llvm/test/CodeGen/X86/vector-extend-inreg.ll
@@ -71,17 +71,16 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
; X32-AVX-NEXT: andl $-128, %esp
; X32-AVX-NEXT: subl $384, %esp # imm = 0x180
; X32-AVX-NEXT: movl 40(%ebp), %ecx
-; X32-AVX-NEXT: vpbroadcastq 32(%ebp), %ymm0
-; X32-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; X32-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X32-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
-; X32-AVX-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovaps %ymm1, (%esp)
-; X32-AVX-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: leal (%ecx,%ecx), %eax
; X32-AVX-NEXT: andl $31, %eax
; X32-AVX-NEXT: movl 128(%esp,%eax,4), %eax
More information about the llvm-commits
mailing list