[llvm] r211176 - [X86] AVX512: Add non-temporal stores
Adam Nemet
anemet at apple.com
Wed Jun 18 09:51:10 PDT 2014
Author: anemet
Date: Wed Jun 18 11:51:10 2014
New Revision: 211176
URL: http://llvm.org/viewvc/llvm-project?rev=211176&view=rev
Log:
[X86] AVX512: Add non-temporal stores
Note that I followed the AVX2 convention here and didn't add LLVM intrinsics
for stores. These can be generated with the nontemporal hint on LLVM IR
stores (see new test). The GCC builtins are lowered directly into nontemporal
stores.
<rdar://problem/17082571>
Added:
llvm/trunk/test/CodeGen/X86/avx512-nontemporal.ll
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/MC/X86/avx512-encodings.s
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=211176&r1=211175&r2=211176&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Jun 18 11:51:10 2014
@@ -1800,6 +1800,35 @@ def VMOVNTDQAZrm : AVX5128I<0x2A, MRMSrc
(int_x86_avx512_movntdqa addr:$src))]>,
EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
+// Prefer non-temporal over temporal versions
+let AddedComplexity = 400, SchedRW = [WriteStore] in {
+
+def VMOVNTPSZmr : AVX512PSI<0x2B, MRMDestMem, (outs),
+ (ins f512mem:$dst, VR512:$src),
+ "vmovntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v16f32 VR512:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>,
+ EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+def VMOVNTPDZmr : AVX512PDI<0x2B, MRMDestMem, (outs),
+ (ins f512mem:$dst, VR512:$src),
+ "vmovntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f64 VR512:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>,
+ EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+
+def VMOVNTDQZmr : AVX512BI<0xE7, MRMDestMem, (outs),
+ (ins i512mem:$dst, VR512:$src),
+ "vmovntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8i64 VR512:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>,
+ EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
+}
+
//===----------------------------------------------------------------------===//
// AVX-512 - Integer arithmetic
//
Added: llvm/trunk/test/CodeGen/X86/avx512-nontemporal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-nontemporal.ll?rev=211176&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-nontemporal.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx512-nontemporal.ll Wed Jun 18 11:51:10 2014
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -mattr=+avx512f | FileCheck %s
+
+define void @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x double> %CC, i32 %D, <8 x i64> %E, <8 x i64> %EE) {
+; CHECK: vmovntps %z
+ %cast = bitcast i8* %B to <16 x float>*
+ %A2 = fadd <16 x float> %A, %AA
+ store <16 x float> %A2, <16 x float>* %cast, align 64, !nontemporal !0
+; CHECK: vmovntdq %z
+ %cast1 = bitcast i8* %B to <8 x i64>*
+ %E2 = add <8 x i64> %E, %EE
+ store <8 x i64> %E2, <8 x i64>* %cast1, align 64, !nontemporal !0
+; CHECK: vmovntpd %z
+ %cast2 = bitcast i8* %B to <8 x double>*
+ %C2 = fadd <8 x double> %C, %CC
+ store <8 x double> %C2, <8 x double>* %cast2, align 64, !nontemporal !0
+ ret void
+}
+
+!0 = metadata !{i32 1}
Modified: llvm/trunk/test/MC/X86/avx512-encodings.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/avx512-encodings.s?rev=211176&r1=211175&r2=211176&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/avx512-encodings.s (original)
+++ llvm/trunk/test/MC/X86/avx512-encodings.s Wed Jun 18 11:51:10 2014
@@ -3163,3 +3163,15 @@ vmovntdqa (%r14,%rdx,2), %zmm18
// CHECK: vmovntdqa
// CHECK: encoding: [0x62,0xc2,0x7d,0x48,0x2a,0x7c,0x14,0x02]
vmovntdqa 128(%r12,%rdx), %zmm23
+
+// CHECK: vmovntdq
+// CHECK: encoding: [0x62,0x21,0x7d,0x48,0xe7,0x24,0xa9]
+vmovntdq %zmm28, (%rcx,%r13,4)
+
+// CHECK: vmovntpd
+// CHECK: encoding: [0x62,0xf1,0xfd,0x48,0x2b,0xb2,0x04,0x00,0x00,0x00]
+vmovntpd %zmm6, 4(%rdx)
+
+// CHECK: vmovntps
+// CHECK: encoding: [0x62,0x51,0x7c,0x48,0x2b,0x5c,0x8d,0x00]
+vmovntps %zmm11, (%r13,%rcx,4)
More information about the llvm-commits
mailing list