[llvm] r295733 - [X86][AVX2] Fix VPBROADCASTQ folding on 32-bit targets.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 21 08:41:45 PST 2017
Author: rksimon
Date: Tue Feb 21 10:41:44 2017
New Revision: 295733
URL: http://llvm.org/viewvc/llvm-project?rev=295733&view=rev
Log:
[X86][AVX2] Fix VPBROADCASTQ folding on 32-bit targets.
As i64 isn't a value type on 32-bit targets, we need to fold the VZEXT_LOAD into VPBROADCASTQ.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=295733&r1=295732&r2=295733&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Feb 21 10:41:44 2017
@@ -1030,7 +1030,18 @@ multiclass avx512_subvec_broadcast_rm<bi
AVX5128IBase, EVEX;
}
+let Predicates = [HasAVX512] in {
+ // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
+ def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQZm addr:$src)>;
+}
+
let Predicates = [HasVLX, HasBWI] in {
+ // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
+ def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQZ128m addr:$src)>;
+ def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQZ256m addr:$src)>;
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
// This means we'll encounter truncated i32 loads; match that here.
def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=295733&r1=295732&r2=295733&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Feb 21 10:41:44 2017
@@ -8265,6 +8265,11 @@ defm VPBROADCASTQ : avx2_broadcast<0x59
v2i64, v4i64, NoVLX>;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
+ // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
+ def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQrm addr:$src)>;
+ def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQYrm addr:$src)>;
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
// This means we'll encounter truncated i32 loads; match that here.
def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll?rev=295733&r1=295732&r2=295733&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll Tue Feb 21 10:41:44 2017
@@ -803,8 +803,7 @@ define <32 x i8> @combine_unpack_unpack_
define <16 x i8> @combine_broadcast_pshufb_insertion_v2i64(i64 %a0) {
; X32-LABEL: combine_broadcast_pshufb_insertion_v2i64:
; X32: # BB#0:
-; X32-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: vpbroadcastq %xmm0, %xmm0
+; X32-NEXT: vpbroadcastq {{[0-9]+}}(%esp), %xmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_broadcast_pshufb_insertion_v2i64:
@@ -821,8 +820,7 @@ define <16 x i8> @combine_broadcast_pshu
define <8 x i32> @combine_broadcast_permd_insertion_v4i64(i64 %a0) {
; X32-LABEL: combine_broadcast_permd_insertion_v4i64:
; X32: # BB#0:
-; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: vbroadcastsd %xmm0, %ymm0
+; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
; X32-NEXT: retl
;
; X64-LABEL: combine_broadcast_permd_insertion_v4i64:
More information about the llvm-commits
mailing list