[llvm] r268889 - [AVX512] Add non-temporal store patterns for v16i32/v32i16/v64i8.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun May 8 16:43:18 PDT 2016
Author: ctopper
Date: Sun May 8 18:43:17 2016
New Revision: 268889
URL: http://llvm.org/viewvc/llvm-project?rev=268889&view=rev
Log:
[AVX512] Add non-temporal store patterns for v16i32/v32i16/v64i8.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512-nontemporal.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=268889&r1=268888&r2=268889&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun May 8 18:43:17 2016
@@ -3214,6 +3214,15 @@ defm VMOVNTDQ : avx512_movnt_vl<0xE7, "v
defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W;
defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS;
+let Predicates = [HasAVX512], AddedComplexity = 400 in {
+ def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
+ (VMOVNTDQZmr addr:$dst, VR512:$src)>;
+ def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
+ (VMOVNTDQZmr addr:$dst, VR512:$src)>;
+ def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
+ (VMOVNTDQZmr addr:$dst, VR512:$src)>;
+}
+
let Predicates = [HasVLX], AddedComplexity = 400 in {
def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
(VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
Modified: llvm/trunk/test/CodeGen/X86/avx512-nontemporal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-nontemporal.ll?rev=268889&r1=268888&r2=268889&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-nontemporal.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-nontemporal.ll Sun May 8 18:43:17 2016
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86-64 -mattr=+avx512f | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+avx512f,+avx512bw | FileCheck %s
-define void @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x double> %CC, i32 %D, <8 x i64> %E, <8 x i64> %EE) {
+define void @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x double> %CC, <8 x i64> %E, <8 x i64> %EE, <16 x i32> %F, <16 x i32> %FF, <32 x i16> %G, <32 x i16> %GG, <64 x i8> %H, <64 x i8> %HH) {
; CHECK: vmovntps %z
%cast = bitcast i8* %B to <16 x float>*
%A2 = fadd <16 x float> %A, %AA
@@ -13,6 +13,18 @@ define void @f(<16 x float> %A, <16 x fl
%cast2 = bitcast i8* %B to <8 x double>*
%C2 = fadd <8 x double> %C, %CC
store <8 x double> %C2, <8 x double>* %cast2, align 64, !nontemporal !0
+; CHECK: vmovntdq %z
+ %cast3 = bitcast i8* %B to <16 x i32>*
+ %F2 = add <16 x i32> %F, %FF
+ store <16 x i32> %F2, <16 x i32>* %cast3, align 64, !nontemporal !0
+; CHECK: vmovntdq %z
+ %cast4 = bitcast i8* %B to <32 x i16>*
+ %G2 = add <32 x i16> %G, %GG
+ store <32 x i16> %G2, <32 x i16>* %cast4, align 64, !nontemporal !0
+; CHECK: vmovntdq %z
+ %cast5 = bitcast i8* %B to <64 x i8>*
+ %H2 = add <64 x i8> %H, %HH
+ store <64 x i8> %H2, <64 x i8>* %cast5, align 64, !nontemporal !0
ret void
}
More information about the llvm-commits
mailing list