[llvm] r357155 - [X85][AVX] Add missing vXi16 broadcast fold patterns
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 28 03:25:13 PDT 2019
Author: rksimon
Date: Thu Mar 28 03:25:13 2019
New Revision: 357155
URL: http://llvm.org/viewvc/llvm-project?rev=357155&view=rev
Log:
[X85][AVX] Add missing vXi16 broadcast fold patterns
Now that D59484 has landed its easier to add these.
Added missing AVX512BW v32i16 equivalents while I was at it.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=357155&r1=357154&r2=357155&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu Mar 28 03:25:13 2019
@@ -1394,12 +1394,30 @@ let Predicates = [HasVLX, HasBWI] in {
def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
(VPBROADCASTWZ256m addr:$src)>;
def : Pat<(v8i16 (X86VBroadcast
+ (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+ (VPBROADCASTWZ128m addr:$src)>;
+ def : Pat<(v8i16 (X86VBroadcast
(i16 (trunc (i32 (zextloadi16 addr:$src)))))),
(VPBROADCASTWZ128m addr:$src)>;
def : Pat<(v16i16 (X86VBroadcast
+ (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+ (VPBROADCASTWZ256m addr:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast
(i16 (trunc (i32 (zextloadi16 addr:$src)))))),
(VPBROADCASTWZ256m addr:$src)>;
}
+let Predicates = [HasBWI] in {
+ // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
+ // This means we'll encounter truncated i32 loads; match that here.
+ def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
+ (VPBROADCASTWZm addr:$src)>;
+ def : Pat<(v32i16 (X86VBroadcast
+ (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+ (VPBROADCASTWZm addr:$src)>;
+ def : Pat<(v32i16 (X86VBroadcast
+ (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
+ (VPBROADCASTWZm addr:$src)>;
+}
//===----------------------------------------------------------------------===//
// AVX-512 BROADCAST SUBVECTORS
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=357155&r1=357154&r2=357155&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Mar 28 03:25:13 2019
@@ -7955,9 +7955,15 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBW
def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
(VPBROADCASTWYrm addr:$src)>;
def : Pat<(v8i16 (X86VBroadcast
+ (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+ (VPBROADCASTWrm addr:$src)>;
+ def : Pat<(v8i16 (X86VBroadcast
(i16 (trunc (i32 (zextloadi16 addr:$src)))))),
(VPBROADCASTWrm addr:$src)>;
def : Pat<(v16i16 (X86VBroadcast
+ (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+ (VPBROADCASTWYrm addr:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast
(i16 (trunc (i32 (zextloadi16 addr:$src)))))),
(VPBROADCASTWYrm addr:$src)>;
}
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll?rev=357155&r1=357154&r2=357155&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll Thu Mar 28 03:25:13 2019
@@ -2666,18 +2666,10 @@ define <8 x i16> @insert_dup_mem_v8i16_s
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: retq
;
-; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: movzwl (%rdi), %eax
-; AVX2-NEXT: vmovd %eax, %xmm0
-; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: insert_dup_mem_v8i16_sext_i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: movzwl (%rdi), %eax
-; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_sext_i16:
+; AVX2OR512VL: # %bb.0:
+; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %xmm0
+; AVX2OR512VL-NEXT: retq
%tmp = load i16, i16* %ptr, align 2
%tmp1 = sext i16 %tmp to i32
%tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll?rev=357155&r1=357154&r2=357155&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll Thu Mar 28 03:25:13 2019
@@ -4729,18 +4729,10 @@ define <16 x i16> @insert_dup_mem_v16i16
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: insert_dup_mem_v16i16_sext_i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: movzwl (%rdi), %eax
-; AVX2-NEXT: vmovd %eax, %xmm0
-; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: insert_dup_mem_v16i16_sext_i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: movzwl (%rdi), %eax
-; AVX512VL-NEXT: vpbroadcastw %eax, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: insert_dup_mem_v16i16_sext_i16:
+; AVX2OR512VL: # %bb.0:
+; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %ymm0
+; AVX2OR512VL-NEXT: retq
%tmp = load i16, i16* %ptr, align 2
%tmp1 = sext i16 %tmp to i32
%tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll?rev=357155&r1=357154&r2=357155&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll Thu Mar 28 03:25:13 2019
@@ -220,8 +220,7 @@ define <32 x i16> @insert_dup_mem_v32i16
;
; SKX-LABEL: insert_dup_mem_v32i16_i32:
; SKX: ## %bb.0:
-; SKX-NEXT: movl (%rdi), %eax
-; SKX-NEXT: vpbroadcastw %eax, %zmm0
+; SKX-NEXT: vpbroadcastw (%rdi), %zmm0
; SKX-NEXT: retq
%tmp = load i32, i32* %ptr, align 4
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
@@ -233,16 +232,13 @@ define <32 x i16> @insert_dup_mem_v32i16
define <32 x i16> @insert_dup_mem_v32i16_sext_i16(i16* %ptr) {
; KNL-LABEL: insert_dup_mem_v32i16_sext_i16:
; KNL: ## %bb.0:
-; KNL-NEXT: movzwl (%rdi), %eax
-; KNL-NEXT: vmovd %eax, %xmm0
-; KNL-NEXT: vpbroadcastw %xmm0, %ymm0
+; KNL-NEXT: vpbroadcastw (%rdi), %ymm0
; KNL-NEXT: vmovdqa %ymm0, %ymm1
; KNL-NEXT: retq
;
; SKX-LABEL: insert_dup_mem_v32i16_sext_i16:
; SKX: ## %bb.0:
-; SKX-NEXT: movzwl (%rdi), %eax
-; SKX-NEXT: vpbroadcastw %eax, %zmm0
+; SKX-NEXT: vpbroadcastw (%rdi), %zmm0
; SKX-NEXT: retq
%tmp = load i16, i16* %ptr, align 2
%tmp1 = sext i16 %tmp to i32
@@ -261,8 +257,7 @@ define <32 x i16> @insert_dup_elt1_mem_v
;
; SKX-LABEL: insert_dup_elt1_mem_v32i16_i32:
; SKX: ## %bb.0:
-; SKX-NEXT: movzwl 2(%rdi), %eax
-; SKX-NEXT: vpbroadcastw %eax, %zmm0
+; SKX-NEXT: vpbroadcastw 2(%rdi), %zmm0
; SKX-NEXT: retq
%tmp = load i32, i32* %ptr, align 4
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
@@ -280,8 +275,7 @@ define <32 x i16> @insert_dup_elt3_mem_v
;
; SKX-LABEL: insert_dup_elt3_mem_v32i16_i32:
; SKX: ## %bb.0:
-; SKX-NEXT: movzwl 2(%rdi), %eax
-; SKX-NEXT: vpbroadcastw %eax, %zmm0
+; SKX-NEXT: vpbroadcastw 2(%rdi), %zmm0
; SKX-NEXT: retq
%tmp = load i32, i32* %ptr, align 4
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
More information about the llvm-commits
mailing list