[llvm] r315382 - [X86] Add broadcast patterns that allow a scalar_to_vector between the broadcast and the load.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 10 15:40:32 PDT 2017
Author: ctopper
Date: Tue Oct 10 15:40:31 2017
New Revision: 315382
URL: http://llvm.org/viewvc/llvm-project?rev=315382&view=rev
Log:
[X86] Add broadcast patterns that allow a scalar_to_vector between the broadcast and the load.
We already have these patterns for AVX512VL, but not AVX1 or 2.
Modified:
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=315382&r1=315381&r2=315382&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Oct 10 15:40:31 2017
@@ -7381,6 +7381,15 @@ let ExeDomain = SSEPackedDouble, Predica
def VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256,
v4f64, v2f64, WriteFShuffle256>, VEX_L;
+let Predicates = [HasAVX, NoVLX] in {
+ def : Pat<(v4f32 (X86VBroadcast (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+ (VBROADCASTSSrm addr:$src)>;
+ def : Pat<(v8f32 (X86VBroadcast (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+ (VBROADCASTSSYrm addr:$src)>;
+ def : Pat<(v4f64 (X86VBroadcast (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+ (VBROADCASTSDYrm addr:$src)>;
+}
+
//===----------------------------------------------------------------------===//
// VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both
// halves of a 256-bit vector.
@@ -7861,6 +7870,15 @@ let Predicates = [HasAVX2, NoVLX] in {
(VPBROADCASTQrm addr:$src)>;
def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
(VPBROADCASTQYrm addr:$src)>;
+
+ def : Pat<(v4i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
+ (VPBROADCASTDrm addr:$src)>;
+ def : Pat<(v8i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
+ (VPBROADCASTDYrm addr:$src)>;
+ def : Pat<(v2i64 (X86VBroadcast (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (VPBROADCASTQrm addr:$src)>;
+ def : Pat<(v4i64 (X86VBroadcast (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (VPBROADCASTQYrm addr:$src)>;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
Modified: llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll?rev=315382&r1=315381&r2=315382&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll Tue Oct 10 15:40:31 2017
@@ -273,8 +273,7 @@ define <16 x i16> @broadcast_mem_v4i16_v
;
; X64-AVX2-LABEL: broadcast_mem_v4i16_v16i16:
; X64-AVX2: ## BB#0:
-; X64-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X64-AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
+; X64-AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
; X64-AVX2-NEXT: retq
;
; X32-AVX512VL-LABEL: broadcast_mem_v4i16_v16i16:
More information about the llvm-commits
mailing list