[PATCH] D55936: [x86] add load fold patterns for movddup with vzext_load

Thu Dec 20 09:17:27 PST 2018

spatel created this revision.
spatel added reviewers: RKSimon, craig.topper, andreadb.
Herald added a subscriber: mcrosier.

The missed load folding noticed in D55898 <https://reviews.llvm.org/D55898> is visible independent of that change either with an adjusted IR pattern to start or with AVX2 (where the build vector becomes a broadcast first; movddup is not produced until we get into isel via tablegen patterns).


https://reviews.llvm.org/D55936

Files:
  lib/Target/X86/X86InstrSSE.td
  test/CodeGen/X86/build-vector-128.ll
  test/CodeGen/X86/vector-shuffle-combining-xop.ll


Index: test/CodeGen/X86/vector-shuffle-combining-xop.ll
===================================================================

--- test/CodeGen/X86/vector-shuffle-combining-xop.ll
+++ test/CodeGen/X86/vector-shuffle-combining-xop.ll
@@ -332,8 +332,7 @@
 ; X86AVX2-LABEL: buildvector_v4f32_0404:
 ; X86AVX2:       # %bb.0:
 ; X86AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86AVX2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86AVX2-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X86AVX2-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
 ; X86AVX2-NEXT:    vmovapd %xmm0, (%eax)
 ; X86AVX2-NEXT:    retl
 ;
Index: test/CodeGen/X86/build-vector-128.ll
===================================================================
--- test/CodeGen/X86/build-vector-128.ll
+++ test/CodeGen/X86/build-vector-128.ll
@@ -523,8 +523,7 @@
 ;
 ; SSE41-32-LABEL: movddup_load_fold:
 ; SSE41-32:       # %bb.0:
-; SSE41-32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE41-32-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
+; SSE41-32-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
 ; SSE41-32-NEXT:    retl
 ;
 ; SSE41-64-LABEL: movddup_load_fold:
@@ -535,8 +534,7 @@
 ;
 ; AVX-32-LABEL: movddup_load_fold:
 ; AVX-32:       # %bb.0:
-; AVX-32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-32-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
 ; AVX-32-NEXT:    retl
 ;
 ; AVX1-64-LABEL: movddup_load_fold:
@@ -604,8 +602,7 @@
 ;
 ; AVX2-32-LABEL: PR37502:
 ; AVX2-32:       # %bb.0:
-; AVX2-32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX2-32-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX2-32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
 ; AVX2-32-NEXT:    retl
 ;
 ; AVX2-64-LABEL: PR37502:
Index: lib/Target/X86/X86InstrSSE.td
===================================================================
--- lib/Target/X86/X86InstrSSE.td
+++ lib/Target/X86/X86InstrSSE.td
@@ -4669,12 +4669,16 @@
 let Predicates = [HasAVX, NoVLX] in {
   def : Pat<(X86Movddup (loadv2f64 addr:$src)),
             (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+  def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))),
+            (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
 }
 
 let Predicates = [UseSSE3] in {
   // No need for aligned memory as this only loads 64-bits.
   def : Pat<(X86Movddup (loadv2f64 addr:$src)),
             (MOVDDUPrm addr:$src)>;
+  def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))),
+            (MOVDDUPrm addr:$src)>;
 }
 
 //===---------------------------------------------------------------------===//
@@ -8034,6 +8038,8 @@
             (VMOVDDUPrr VR128:$src)>;
   def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
             (VMOVDDUPrm addr:$src)>;
+  def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))),
+            (VMOVDDUPrm addr:$src)>;
 }
 
 let Predicates = [HasAVX1Only] in {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D55936.179088.patch
Type: text/x-patch
Size: 2851 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20181220/452323ea/attachment.bin>