[llvm] r278317 - [AVX-512] Add patterns to allow EVEX encoded stores of v16i16/v8i16/v16i8/v32i8 even when BWI is not supported.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 10 23:04:04 PDT 2016
Author: ctopper
Date: Thu Aug 11 01:04:04 2016
New Revision: 278317
URL: http://llvm.org/viewvc/llvm-project?rev=278317&view=rev
Log:
[AVX-512] Add patterns to allow EVEX encoded stores of v16i16/v8i16/v16i8/v32i8 even when BWI is not supported.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=278317&r1=278316&r2=278317&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu Aug 11 01:04:04 2016
@@ -2793,6 +2793,28 @@ def : Pat<(v16i32 (vselect (xor VK16:$ma
(v16i32 VR512:$src))),
(VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
+let Predicates = [HasVLX, NoBWI] in {
+ // 128-bit load/store without BWI.
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (VMOVDQA32Z128mr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (VMOVDQA32Z128mr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (VMOVDQU32Z128mr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (VMOVDQU32Z128mr addr:$dst, VR128:$src)>;
+
+ // 256-bit load/store without BWI.
+ def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst),
+ (VMOVDQA32Z256mr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst),
+ (VMOVDQA32Z256mr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v16i16 VR256:$src), addr:$dst),
+ (VMOVDQU32Z256mr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v32i8 VR256:$src), addr:$dst),
+ (VMOVDQU32Z256mr addr:$dst, VR256:$src)>;
+}
+
let Predicates = [HasVLX] in {
// Special patterns for storing subvector extracts of lower 128-bits of 256.
// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=278317&r1=278316&r2=278317&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Aug 11 01:04:04 2016
@@ -1028,7 +1028,7 @@ let Predicates = [HasAVX, NoVLX] in {
(VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
}
-let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
+let Predicates = [HasAVX, NoVLX] in {
// 128-bit load/store
def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
(VMOVAPSmr addr:$dst, VR128:$src)>;
Modified: llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll?rev=278317&r1=278316&r2=278317&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll Thu Aug 11 01:04:04 2016
@@ -4151,7 +4151,7 @@ define void @store_cvt_4f32_to_8i16_zero
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; AVX512VL-NEXT: vmovdqa %xmm0, (%r14)
+; AVX512VL-NEXT: vmovdqa32 %xmm0, (%r14)
; AVX512VL-NEXT: addq $16, %rsp
; AVX512VL-NEXT: popq %rbx
; AVX512VL-NEXT: popq %r14
@@ -5944,7 +5944,7 @@ define void @store_cvt_4f64_to_8i16_zero
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; AVX512VL-NEXT: vmovdqa %xmm0, (%r14)
+; AVX512VL-NEXT: vmovdqa32 %xmm0, (%r14)
; AVX512VL-NEXT: addq $32, %rsp
; AVX512VL-NEXT: popq %rbx
; AVX512VL-NEXT: popq %r14
More information about the llvm-commits
mailing list