[llvm] 70e4fb8 - [X86] Add DAG combine to turn (vzext_movl (vbroadcast_load)) -> vzext_load.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 8 00:36:05 PST 2020


Author: Craig Topper
Date: 2020-03-08T00:35:40-08:00
New Revision: 70e4fb8a53984ff57b503cf13706c7e6683d650e

URL: https://github.com/llvm/llvm-project/commit/70e4fb8a53984ff57b503cf13706c7e6683d650e
DIFF: https://github.com/llvm/llvm-project/commit/70e4fb8a53984ff57b503cf13706c7e6683d650e.diff

LOG: [X86] Add DAG combine to turn (vzext_movl (vbroadcast_load)) -> vzext_load.

If we're zeroing the other elements then we don't need the broadcast.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/vector-extend-inreg.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 11f9722914db..f2fe4a03a02b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35965,9 +35965,30 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
                                   VT.getVectorElementType(),
                                   LN->getPointerInfo(),
                                   LN->getAlignment(),
-                                  MachineMemOperand::MOLoad);
+                                  LN->getMemOperand()->getFlags());
+      DCI.CombineTo(N, VZLoad);
+      DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
+      DCI.recursivelyDeleteUnusedNodes(LN);
+      return SDValue(N, 0);
+    }
+  }
+
+  // If this a VZEXT_MOVL of a VBROADCAST_LOAD, we don't need the broadcast and
+  // can just use a VZEXT_LOAD.
+  // FIXME: Is there some way to do this with SimplifyDemandedVectorElts?
+  if (N->getOpcode() == X86ISD::VZEXT_MOVL && N->getOperand(0).hasOneUse() &&
+      N->getOperand(0).getOpcode() == X86ISD::VBROADCAST_LOAD) {
+    auto *LN = cast<MemSDNode>(N->getOperand(0));
+    if (VT.getScalarSizeInBits() == LN->getMemoryVT().getSizeInBits()) {
+      SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+      SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+      SDValue VZLoad =
+          DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
+                                  LN->getMemoryVT(), LN->getMemOperand());
+      DCI.CombineTo(N, VZLoad);
       DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
-      return VZLoad;
+      DCI.recursivelyDeleteUnusedNodes(LN);
+      return SDValue(N, 0);
     }
   }
 

diff  --git a/llvm/test/CodeGen/X86/vector-extend-inreg.ll b/llvm/test/CodeGen/X86/vector-extend-inreg.ll
index f60bf4b01095..98a35c4a7934 100644
--- a/llvm/test/CodeGen/X86/vector-extend-inreg.ll
+++ b/llvm/test/CodeGen/X86/vector-extend-inreg.ll
@@ -71,17 +71,16 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
 ; X32-AVX-NEXT:    andl $-128, %esp
 ; X32-AVX-NEXT:    subl $384, %esp # imm = 0x180
 ; X32-AVX-NEXT:    movl 40(%ebp), %ecx
-; X32-AVX-NEXT:    vpbroadcastq 32(%ebp), %ymm0
-; X32-AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; X32-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; X32-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
 ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
 ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
-; X32-AVX-NEXT:    vmovdqa %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
 ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
 ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
 ; X32-AVX-NEXT:    vmovaps %ymm1, (%esp)
-; X32-AVX-NEXT:    vmovdqa %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
 ; X32-AVX-NEXT:    leal (%ecx,%ecx), %eax
 ; X32-AVX-NEXT:    andl $31, %eax
 ; X32-AVX-NEXT:    movl 128(%esp,%eax,4), %eax


        


More information about the llvm-commits mailing list