[PATCH] D27661: [X86][AVX512] Add missing patterns for broadcast fallback in case load node has multiple uses (for v4i64 and v4f64).

Ayman Musa via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 11 13:38:52 PST 2016


aymanmus updated this revision to Diff 81024.

https://reviews.llvm.org/D27661

Files:
  lib/Target/X86/X86InstrAVX512.td
  test/CodeGen/X86/subvector-broadcast.ll


Index: lib/Target/X86/X86InstrAVX512.td
===================================================================
--- lib/Target/X86/X86InstrAVX512.td
+++ lib/Target/X86/X86InstrAVX512.td
@@ -1079,6 +1079,12 @@
 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
           (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
                            (v8f32 VR256X:$src), 1)>;
+def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
+          (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+                           (v4f64 VR256X:$src), 1)>;
+def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
+          (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+                           (v4i64 VR256X:$src), 1)>;
 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
           (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
                            (v8i32 VR256X:$src), 1)>;
Index: test/CodeGen/X86/subvector-broadcast.ll
===================================================================
--- test/CodeGen/X86/subvector-broadcast.ll
+++ test/CodeGen/X86/subvector-broadcast.ll
@@ -1339,3 +1339,78 @@
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   ret <16 x i32> %2
 }
+
+
+;
+; subvector Load with multiple uses + broadcast
+; Fallback to the broadcast should be done
+;
+
+ at ga4 = global <4 x i64> zeroinitializer, align 8
+ at gb4 = global <8 x i64> zeroinitializer, align 8
+
+define void @fallback_broadcast_v4i64_to_v8i64(<4 x i64> %a, <8 x i64> %b) {
+; X32-AVX512-LABEL: fallback_broadcast_v4i64_to_v8i64:
+; X32-AVX512:       ## BB#0: ## %entry
+; X32-AVX512-NEXT:    vpaddq LCPI26_0, %ymm0, %ymm0
+; X32-AVX512-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,2,0,3,0,4,0,1,0,2,0,3,0,4,0]
+; X32-AVX512-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
+; X32-AVX512-NEXT:    vpandq %zmm2, %zmm1, %zmm1
+; X32-AVX512-NEXT:    vmovdqu64 %ymm0, _ga4
+; X32-AVX512-NEXT:    vmovdqu64 %zmm1, _gb4
+; X32-AVX512-NEXT:    retl
+;
+; X64-AVX512-LABEL: fallback_broadcast_v4i64_to_v8i64:
+; X64-AVX512:       ## BB#0: ## %entry
+; X64-AVX512-NEXT:    vmovdqa64 {{.*#+}} ymm2 = [1,2,3,4]
+; X64-AVX512-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vinserti64x4 $1, %ymm2, %zmm2, %zmm2
+; X64-AVX512-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
+; X64-AVX512-NEXT:    vpandq %zmm2, %zmm1, %zmm1
+; X64-AVX512-NEXT:    vmovdqu64 %ymm0, {{.*}}(%rip)
+; X64-AVX512-NEXT:    vmovdqu64 %zmm1, {{.*}}(%rip)
+; X64-AVX512-NEXT:    retq
+entry:
+  %0 = add <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4>
+  %1 = add <8 x i64> %b, <i64 1, i64 2, i64 3, i64 4, i64 1, i64 2, i64 3, i64 4>
+  %2 = and <8 x i64> %1, <i64 1, i64 2, i64 3, i64 4, i64 1, i64 2, i64 3, i64 4>
+  store <4 x i64> %0, <4 x i64>* @ga4, align 8
+  store <8 x i64> %2, <8 x i64>* @gb4, align 8
+  ret void
+}
+
+
+
+ at ga2 = global <4 x double> zeroinitializer, align 8
+ at gb2 = global <8 x double> zeroinitializer, align 8
+
+define void @fallback_broadcast_v4f64_to_v8f64(<4 x double> %a, <8 x double> %b) {
+; X32-AVX512-LABEL: fallback_broadcast_v4f64_to_v8f64:
+; X32-AVX512:       ## BB#0: ## %entry
+; X32-AVX512-NEXT:    vmovapd {{.*#+}} ymm2 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
+; X32-AVX512-NEXT:    vaddpd %ymm2, %ymm0, %ymm0
+; X32-AVX512-NEXT:    vinsertf64x4 $1, %ymm2, %zmm2, %zmm2
+; X32-AVX512-NEXT:    vaddpd %zmm2, %zmm1, %zmm1
+; X32-AVX512-NEXT:    vdivpd %zmm2, %zmm1, %zmm1
+; X32-AVX512-NEXT:    vmovupd %ymm0, _ga2
+; X32-AVX512-NEXT:    vmovupd %zmm1, _gb2
+; X32-AVX512-NEXT:    retl
+;
+; X64-AVX512-LABEL: fallback_broadcast_v4f64_to_v8f64:
+; X64-AVX512:       ## BB#0: ## %entry
+; X64-AVX512-NEXT:    vmovapd {{.*#+}} ymm2 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
+; X64-AVX512-NEXT:    vaddpd %ymm2, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vinsertf64x4 $1, %ymm2, %zmm2, %zmm2
+; X64-AVX512-NEXT:    vaddpd %zmm2, %zmm1, %zmm1
+; X64-AVX512-NEXT:    vdivpd %zmm2, %zmm1, %zmm1
+; X64-AVX512-NEXT:    vmovupd %ymm0, {{.*}}(%rip)
+; X64-AVX512-NEXT:    vmovupd %zmm1, {{.*}}(%rip)
+; X64-AVX512-NEXT:    retq
+entry:
+  %0 = fadd <4 x double> %a, <double 1.0, double 2.0, double 3.0, double 4.0>
+  %1 = fadd <8 x double> %b, <double 1.0, double 2.0, double 3.0, double 4.0, double 1.0, double 2.0, double 3.0, double 4.0>
+  %2 = fdiv <8 x double> %1, <double 1.0, double 2.0, double 3.0, double 4.0, double 1.0, double 2.0, double 3.0, double 4.0>
+  store <4 x double> %0, <4 x double>* @ga2, align 8
+  store <8 x double> %2, <8 x double>* @gb2, align 8
+  ret void
+}


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D27661.81024.patch
Type: text/x-patch
Size: 4748 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161211/d7a55669/attachment-0001.bin>


More information about the llvm-commits mailing list