[PATCH] D27661: [X86][AVX512] Add missing patterns for broadcast fallback in case load node has multiple uses (for v4i64 and v4f64).

Ayman Musa via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 11 13:38:49 PST 2016


aymanmus created this revision.
aymanmus added reviewers: mkuper, zvi, spatel, RKSimon.
aymanmus added a subscriber: llvm-commits.

When the load node which the broadcast instruction broadcasts has multiple uses, it cannot be folded.
A fallback pattern is added to catch these cases and provide another solution.


https://reviews.llvm.org/D27661

Files:
  lib/Target/X86/X86InstrAVX512.td
  test/CodeGen/X86/pr31306.ll


Index: lib/Target/X86/X86InstrAVX512.td
===================================================================
--- lib/Target/X86/X86InstrAVX512.td
+++ lib/Target/X86/X86InstrAVX512.td
@@ -1079,6 +1079,12 @@
 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
           (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
                            (v8f32 VR256X:$src), 1)>;
+def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
+          (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+                           (v4f64 VR256X:$src), 1)>;
+def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
+          (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+                           (v4i64 VR256X:$src), 1)>;
 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
           (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
                            (v8i32 VR256X:$src), 1)>;
Index: test/CodeGen/X86/pr31306.ll
===================================================================
--- test/CodeGen/X86/pr31306.ll
+++ test/CodeGen/X86/pr31306.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -o - -O2 -mattr=avx512f | FileCheck %s
+
+ at ga = global <4 x i64> zeroinitializer, align 8
+ at gb = global <8 x i64> zeroinitializer, align 8
+
+define void @blabla(<4 x i64> %a, <8 x i64> %b) {
+; CHECK-LABEL: blabla:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,2,3,4]
+; CHECK-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
+; CHECK-NEXT:    vinserti64x4 $1, %ymm2, %zmm2, %zmm2
+; CHECK-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
+; CHECK-NEXT:    vpandq %zmm2, %zmm1, %zmm1
+; CHECK-NEXT:    vmovdqu %ymm0, {{.*}}(%rip)
+; CHECK-NEXT:    vmovdqu64 %zmm1, {{.*}}(%rip)
+; CHECK-NEXT:    retq
+entry:
+  %0 = add <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4>
+  %1 = add <8 x i64> %b, <i64 1, i64 2, i64 3, i64 4, i64 1, i64 2, i64 3, i64 4>
+  %2 = and <8 x i64> %1, <i64 1, i64 2, i64 3, i64 4, i64 1, i64 2, i64 3, i64 4>
+  store <4 x i64> %0, <4 x i64>* @ga, align 8
+  store <8 x i64> %2, <8 x i64>* @gb, align 8
+  ret void
+}
+
+
+
+ at ga2 = global <4 x double> zeroinitializer, align 8
+ at gb2 = global <8 x double> zeroinitializer, align 8
+
+define void @blabla2(<4 x double> %a, <8 x double> %b) {
+; CHECK-LABEL: blabla2:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    vmovapd {{.*#+}} ymm2 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
+; CHECK-NEXT:    vaddpd %ymm2, %ymm0, %ymm0
+; CHECK-NEXT:    vinsertf64x4 $1, %ymm2, %zmm2, %zmm2
+; CHECK-NEXT:    vaddpd %zmm2, %zmm1, %zmm1
+; CHECK-NEXT:    vdivpd %zmm2, %zmm1, %zmm1
+; CHECK-NEXT:    vmovupd %ymm0, {{.*}}(%rip)
+; CHECK-NEXT:    vmovupd %zmm1, {{.*}}(%rip)
+; CHECK-NEXT:    retq
+entry:
+  %0 = fadd <4 x double> %a, <double 1.0, double 2.0, double 3.0, double 4.0>
+  %1 = fadd <8 x double> %b, <double 1.0, double 2.0, double 3.0, double 4.0, double 1.0, double 2.0, double 3.0, double 4.0>
+  %2 = fdiv <8 x double> %1, <double 1.0, double 2.0, double 3.0, double 4.0, double 1.0, double 2.0, double 3.0, double 4.0>
+  store <4 x double> %0, <4 x double>* @ga2, align 8
+  store <8 x double> %2, <8 x double>* @gb2, align 8
+  ret void
+}


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D27661.81018.patch
Type: text/x-patch
Size: 3340 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161211/28d09c27/attachment-0001.bin>


More information about the llvm-commits mailing list