[llvm] 7d0ea3f - [X86] combineConcatVectorOps - fold concat(movddup(x),movddup(y)) -> movddup(concat(x,y))

Fri Jan 14 06:51:44 PST 2022

Author: Simon Pilgrim
Date: 2022-01-14T14:49:57Z
New Revision: 7d0ea3f41aa729ce70a413a94867836b73881d2b

URL: https://github.com/llvm/llvm-project/commit/7d0ea3f41aa729ce70a413a94867836b73881d2b
DIFF: https://github.com/llvm/llvm-project/commit/7d0ea3f41aa729ce70a413a94867836b73881d2b.diff

LOG: [X86] combineConcatVectorOps - fold concat(movddup(x),movddup(y)) -> movddup(concat(x,y))

For AVX2+ targets this requires us to also recognise v4f64 concat(broadcast(x),broadcast(y)) -> movddup(concat(x,y))

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 27ce65b1ebfdb..5258484d8663b 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -52427,6 +52427,20 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
 
     unsigned NumOps = Ops.size();
     switch (Op0.getOpcode()) {
+    case X86ISD::VBROADCAST: {
+      if (!IsSplat && VT == MVT::v4f64 && llvm::all_of(Ops, [Op0](SDValue Op) {
+            return Op.getOperand(0).getValueType().is128BitVector();
+          }))
+        return DAG.getNode(X86ISD::MOVDDUP, DL, VT,
+                           ConcatSubOperand(VT, Ops, 0));
+      break;
+    }
+    case X86ISD::MOVDDUP: {
+      if (!IsSplat)
+        return DAG.getNode(Op0.getOpcode(), DL, VT,
+                           ConcatSubOperand(VT, Ops, 0));
+      break;
+    }
     case X86ISD::SHUFP: {
       // Add SHUFPD support if/when necessary.
       if (!IsSplat && VT.getScalarType() == MVT::f32 &&

diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
index 31032f68314bb..3af2605f0de01 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -729,9 +729,9 @@ define <4 x double> @shuffle_v4f64_0044(<4 x double> %a, <4 x double> %b) {
 define <4 x double> @shuffle_v4f64_0044_v2f64(<2 x double> %a, <2 x double> %b) {
 ; ALL-LABEL: shuffle_v4f64_0044_v2f64:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
-; ALL-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
+; ALL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
 ; ALL-NEXT:    retq
   %1 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 0, i32 0>
   %2 = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> <i32 0, i32 0>