[llvm] r270355 - [AVX512] Add an AddedComplexity line to the 512-bit insert_subvector undef index 0 patterns. This gives them higher priority than the memory patterns. This matches AVX1/2.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun May 22 00:40:44 PDT 2016
Author: ctopper
Date: Sun May 22 02:40:40 2016
New Revision: 270355
URL: http://llvm.org/viewvc/llvm-project?rev=270355&view=rev
Log:
[AVX512] Add an AddedComplexity line to the 512-bit insert_subvector undef index 0 patterns. This gives them higher priority than the memory patterns. This matches AVX1/2.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=270355&r1=270354&r2=270355&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun May 22 02:40:40 2016
@@ -796,6 +796,7 @@ defm : vextract_for_size_lowering<"VEXTR
// A 128-bit subvector insert to the first 512-bit vector position
// is a subregister copy that needs no instruction.
+let AddedComplexity = 25 in { // to give priority over vinsertf128rm
def : Pat<(v8i64 (insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0))),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
def : Pat<(v8f64 (insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0))),
@@ -821,6 +822,7 @@ def : Pat<(v32i16 (insert_subvector unde
(INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
def : Pat<(v64i8 (insert_subvector undef, (v32i8 VR256X:$src), (iPTR 0))),
(INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
+}
// vextractps - extract 32 bits from XMM
def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
Modified: llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll?rev=270355&r1=270354&r2=270355&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll Sun May 22 02:40:40 2016
@@ -8,17 +8,17 @@
define <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8f64_2f64_12u4:
; ALL: # BB#0:
-; ALL-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $0, 16(%rdi), %zmm0, %zmm1
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovupd 16(%rdi), %ymm0
+; ALL-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm1
+; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_8f64_2f64_12u4:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vinsertf128 $1, 64(%eax), %ymm0, %ymm0
-; X32-AVX512F-NEXT: vinsertf64x4 $0, 16(%eax), %zmm0, %zmm1
-; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-AVX512F-NEXT: vmovupd 16(%eax), %ymm0
+; X32-AVX512F-NEXT: vinsertf128 $1, 64(%eax), %ymm0, %ymm1
+; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 1
%ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
@@ -35,19 +35,19 @@ define <8 x double> @merge_8f64_2f64_12u
define <8 x double> @merge_8f64_2f64_23z5(<2 x double>* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8f64_2f64_23z5:
; ALL: # BB#0:
-; ALL-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; ALL-NEXT: vinsertf128 $1, 80(%rdi), %ymm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $0, 32(%rdi), %zmm0, %zmm1
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovupd 32(%rdi), %ymm0
+; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vinsertf128 $1, 80(%rdi), %ymm1, %ymm1
+; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_8f64_2f64_23z5:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; X32-AVX512F-NEXT: vinsertf128 $1, 80(%eax), %ymm0, %ymm0
-; X32-AVX512F-NEXT: vinsertf64x4 $0, 32(%eax), %zmm0, %zmm1
-; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-AVX512F-NEXT: vmovupd 32(%eax), %ymm0
+; X32-AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; X32-AVX512F-NEXT: vinsertf128 $1, 80(%eax), %ymm1, %ymm1
+; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
%ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 3
More information about the llvm-commits
mailing list