[llvm] r215536 - [SKX] Extended non-temporal load/store instructions for AVX512VL subsets.
Robert Khasanov
rob.khasanov at gmail.com
Wed Aug 13 03:46:02 PDT 2014
Author: rkhasanov
Date: Wed Aug 13 05:46:00 2014
New Revision: 215536
URL: http://llvm.org/viewvc/llvm-project?rev=215536&view=rev
Log:
[SKX] Extended non-temporal load/store instructions for AVX512VL subsets.
Added avx512_movnt_vl multiclass for handling 256/128-bit forms of instruction.
Added encoding and lowering tests.
Reviewed by Elena Demikhovsky <elena.demikhovsky at intel.com>
Added:
llvm/trunk/test/CodeGen/X86/avx512vl-nontemporal.ll
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrInfo.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/MC/X86/avx512-encodings.s
llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=215536&r1=215535&r2=215536&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Wed Aug 13 05:46:00 2014
@@ -1954,8 +1954,6 @@ let TargetPrefix = "x86" in { // All in
llvm_i32_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa256">,
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
- def int_x86_avx512_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa512">,
- Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
}
//===----------------------------------------------------------------------===//
@@ -3219,6 +3217,8 @@ let TargetPrefix = "x86" in {
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
llvm_v8i64_ty, llvm_i8_ty],
[IntrNoMem]>;
+ def int_x86_avx512_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa512">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
}
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=215536&r1=215535&r2=215536&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Aug 13 05:46:00 2014
@@ -2090,43 +2090,73 @@ def : Pat<(v8i64 (X86Vinsert undef, GR64
//===----------------------------------------------------------------------===//
// AVX-512 - Non-temporals
//===----------------------------------------------------------------------===//
+let SchedRW = [WriteLoad] in {
+ def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
+ (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
+ [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
+ SSEPackedInt>, EVEX, T8PD, EVEX_V512,
+ EVEX_CD8<64, CD8VF>;
+
+ let Predicates = [HasAVX512, HasVLX] in {
+ def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
+ (ins i256mem:$src),
+ "vmovntdqa\t{$src, $dst|$dst, $src}", [],
+ SSEPackedInt>, EVEX, T8PD, EVEX_V256,
+ EVEX_CD8<64, CD8VF>;
+
+ def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
+ (ins i128mem:$src),
+ "vmovntdqa\t{$src, $dst|$dst, $src}", [],
+ SSEPackedInt>, EVEX, T8PD, EVEX_V128,
+ EVEX_CD8<64, CD8VF>;
+ }
+}
+
+multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
+ ValueType OpVT, RegisterClass RC, X86MemOperand memop,
+ Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
+ let SchedRW = [WriteStore], mayStore = 1,
+ AddedComplexity = 400 in
+ def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
+}
-def VMOVNTDQAZrm : AVX5128I<0x2A, MRMSrcMem, (outs VR512:$dst),
- (ins i512mem:$src),
- "vmovntdqa\t{$src, $dst|$dst, $src}",
- [(set VR512:$dst,
- (int_x86_avx512_movntdqa addr:$src))]>,
- EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
-
-// Prefer non-temporal over temporal versions
-let AddedComplexity = 400, SchedRW = [WriteStore] in {
-
-def VMOVNTPSZmr : AVX512PSI<0x2B, MRMDestMem, (outs),
- (ins f512mem:$dst, VR512:$src),
- "vmovntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v16f32 VR512:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>,
- EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-def VMOVNTPDZmr : AVX512PDI<0x2B, MRMDestMem, (outs),
- (ins f512mem:$dst, VR512:$src),
- "vmovntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v8f64 VR512:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>,
- EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-
-def VMOVNTDQZmr : AVX512BI<0xE7, MRMDestMem, (outs),
- (ins i512mem:$dst, VR512:$src),
- "vmovntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v8i64 VR512:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>,
- EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
+multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
+ string elty, string elsz, string vsz512,
+ string vsz256, string vsz128, Domain d,
+ Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
+ let Predicates = [prd] in
+ defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
+ !cast<ValueType>("v"##vsz512##elty##elsz), VR512,
+ !cast<X86MemOperand>(elty##"512mem"), d, itin>,
+ EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
+ !cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
+ !cast<X86MemOperand>(elty##"256mem"), d, itin>,
+ EVEX_V256;
+
+ defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
+ !cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
+ !cast<X86MemOperand>(elty##"128mem"), d, itin>,
+ EVEX_V128;
+ }
}
+defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
+ "i", "64", "8", "4", "2", SSEPackedInt,
+ HasAVX512>, PD, EVEX_CD8<64, CD8VF>;
+
+defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
+ "f", "64", "8", "4", "2", SSEPackedDouble,
+ HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
+ "f", "32", "16", "8", "4", SSEPackedSingle,
+ HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
+
//===----------------------------------------------------------------------===//
// AVX-512 - Integer arithmetic
//
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=215536&r1=215535&r2=215536&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Wed Aug 13 05:46:00 2014
@@ -727,6 +727,7 @@ def HasDQI : Predicate<"Subtarget-
def HasBWI : Predicate<"Subtarget->hasBWI()">;
def HasVLX : Predicate<"Subtarget->hasVLX()">,
AssemblerPredicate<"FeatureVLX", "AVX-512 VLX ISA">;
+def NoVLX : Predicate<"!Subtarget->hasVLX()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=215536&r1=215535&r2=215536&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Aug 13 05:46:00 2014
@@ -3697,6 +3697,7 @@ let Predicates = [UseSSE1] in {
let AddedComplexity = 400 in { // Prefer non-temporal versions
let SchedRW = [WriteStore] in {
+let Predicates = [HasAVX, NoVLX] in {
def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
@@ -3737,6 +3738,7 @@ def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem,
[(alignednontemporalstore (v4i64 VR256:$src),
addr:$dst)],
IIC_SSE_MOVNT>, VEX, VEX_L;
+}
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
Added: llvm/trunk/test/CodeGen/X86/avx512vl-nontemporal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-nontemporal.ll?rev=215536&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-nontemporal.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-nontemporal.ll Wed Aug 13 05:46:00 2014
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding | FileCheck %s
+
+define void @f256(<8 x float> %A, <8 x float> %AA, i8* %B, <4 x double> %C, <4 x double> %CC, i32 %D, <4 x i64> %E, <4 x i64> %EE) {
+; CHECK: vmovntps %ymm{{.*}} ## encoding: [0x62
+ %cast = bitcast i8* %B to <8 x float>*
+ %A2 = fadd <8 x float> %A, %AA
+ store <8 x float> %A2, <8 x float>* %cast, align 64, !nontemporal !0
+; CHECK: vmovntdq %ymm{{.*}} ## encoding: [0x62
+ %cast1 = bitcast i8* %B to <4 x i64>*
+ %E2 = add <4 x i64> %E, %EE
+ store <4 x i64> %E2, <4 x i64>* %cast1, align 64, !nontemporal !0
+; CHECK: vmovntpd %ymm{{.*}} ## encoding: [0x62
+ %cast2 = bitcast i8* %B to <4 x double>*
+ %C2 = fadd <4 x double> %C, %CC
+ store <4 x double> %C2, <4 x double>* %cast2, align 64, !nontemporal !0
+ ret void
+}
+
+define void @f128(<4 x float> %A, <4 x float> %AA, i8* %B, <2 x double> %C, <2 x double> %CC, i32 %D, <2 x i64> %E, <2 x i64> %EE) {
+; CHECK: vmovntps %xmm{{.*}} ## encoding: [0x62
+ %cast = bitcast i8* %B to <4 x float>*
+ %A2 = fadd <4 x float> %A, %AA
+ store <4 x float> %A2, <4 x float>* %cast, align 64, !nontemporal !0
+; CHECK: vmovntdq %xmm{{.*}} ## encoding: [0x62
+ %cast1 = bitcast i8* %B to <2 x i64>*
+ %E2 = add <2 x i64> %E, %EE
+ store <2 x i64> %E2, <2 x i64>* %cast1, align 64, !nontemporal !0
+; CHECK: vmovntpd %xmm{{.*}} ## encoding: [0x62
+ %cast2 = bitcast i8* %B to <2 x double>*
+ %C2 = fadd <2 x double> %C, %CC
+ store <2 x double> %C2, <2 x double>* %cast2, align 64, !nontemporal !0
+ ret void
+}
+!0 = metadata !{i32 1}
Modified: llvm/trunk/test/MC/X86/avx512-encodings.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/avx512-encodings.s?rev=215536&r1=215535&r2=215536&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/avx512-encodings.s (original)
+++ llvm/trunk/test/MC/X86/avx512-encodings.s Wed Aug 13 05:46:00 2014
@@ -665,6 +665,102 @@
// CHECK: encoding: [0x62,0xf1,0xfe,0x48,0x6f,0xb2,0xc0,0xdf,0xff,0xff]
vmovdqu64 -8256(%rdx), %zmm6
+// CHECK: vmovntdq %zmm24, (%rcx)
+// CHECK: encoding: [0x62,0x61,0x7d,0x48,0xe7,0x01]
+ vmovntdq %zmm24, (%rcx)
+
+// CHECK: vmovntdq %zmm24, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x21,0x7d,0x48,0xe7,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vmovntdq %zmm24, 291(%rax,%r14,8)
+
+// CHECK: vmovntdq %zmm24, 8128(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7d,0x48,0xe7,0x42,0x7f]
+ vmovntdq %zmm24, 8128(%rdx)
+
+// CHECK: vmovntdq %zmm24, 8192(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7d,0x48,0xe7,0x82,0x00,0x20,0x00,0x00]
+ vmovntdq %zmm24, 8192(%rdx)
+
+// CHECK: vmovntdq %zmm24, -8192(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7d,0x48,0xe7,0x42,0x80]
+ vmovntdq %zmm24, -8192(%rdx)
+
+// CHECK: vmovntdq %zmm24, -8256(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7d,0x48,0xe7,0x82,0xc0,0xdf,0xff,0xff]
+ vmovntdq %zmm24, -8256(%rdx)
+
+// CHECK: vmovntdqa (%rcx), %zmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x09]
+ vmovntdqa (%rcx), %zmm17
+
+// CHECK: vmovntdqa 291(%rax,%r14,8), %zmm17
+// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x2a,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vmovntdqa 291(%rax,%r14,8), %zmm17
+
+// CHECK: vmovntdqa 8128(%rdx), %zmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x4a,0x7f]
+ vmovntdqa 8128(%rdx), %zmm17
+
+// CHECK: vmovntdqa 8192(%rdx), %zmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x8a,0x00,0x20,0x00,0x00]
+ vmovntdqa 8192(%rdx), %zmm17
+
+// CHECK: vmovntdqa -8192(%rdx), %zmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x4a,0x80]
+ vmovntdqa -8192(%rdx), %zmm17
+
+// CHECK: vmovntdqa -8256(%rdx), %zmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x8a,0xc0,0xdf,0xff,0xff]
+ vmovntdqa -8256(%rdx), %zmm17
+
+// CHECK: vmovntpd %zmm17, (%rcx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x09]
+ vmovntpd %zmm17, (%rcx)
+
+// CHECK: vmovntpd %zmm17, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa1,0xfd,0x48,0x2b,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vmovntpd %zmm17, 291(%rax,%r14,8)
+
+// CHECK: vmovntpd %zmm17, 8128(%rdx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x4a,0x7f]
+ vmovntpd %zmm17, 8128(%rdx)
+
+// CHECK: vmovntpd %zmm17, 8192(%rdx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x8a,0x00,0x20,0x00,0x00]
+ vmovntpd %zmm17, 8192(%rdx)
+
+// CHECK: vmovntpd %zmm17, -8192(%rdx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x4a,0x80]
+ vmovntpd %zmm17, -8192(%rdx)
+
+// CHECK: vmovntpd %zmm17, -8256(%rdx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x8a,0xc0,0xdf,0xff,0xff]
+ vmovntpd %zmm17, -8256(%rdx)
+
+// CHECK: vmovntps %zmm5, (%rcx)
+// CHECK: encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x29]
+ vmovntps %zmm5, (%rcx)
+
+// CHECK: vmovntps %zmm5, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xb1,0x7c,0x48,0x2b,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vmovntps %zmm5, 291(%rax,%r14,8)
+
+// CHECK: vmovntps %zmm5, 8128(%rdx)
+// CHECK: encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x6a,0x7f]
+ vmovntps %zmm5, 8128(%rdx)
+
+// CHECK: vmovntps %zmm5, 8192(%rdx)
+// CHECK: encoding: [0x62,0xf1,0x7c,0x48,0x2b,0xaa,0x00,0x20,0x00,0x00]
+ vmovntps %zmm5, 8192(%rdx)
+
+// CHECK: vmovntps %zmm5, -8192(%rdx)
+// CHECK: encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x6a,0x80]
+ vmovntps %zmm5, -8192(%rdx)
+
+// CHECK: vmovntps %zmm5, -8256(%rdx)
+// CHECK: encoding: [0x62,0xf1,0x7c,0x48,0x2b,0xaa,0xc0,0xdf,0xff,0xff]
+ vmovntps %zmm5, -8256(%rdx)
+
// CHECK: vmovupd %zmm9, %zmm27
// CHECK: encoding: [0x62,0x41,0xfd,0x48,0x10,0xd9]
vmovupd %zmm9, %zmm27
Modified: llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s?rev=215536&r1=215535&r2=215536&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s (original)
+++ llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s Wed Aug 13 05:46:00 2014
@@ -432,6 +432,198 @@
// CHECK: encoding: [0x62,0x61,0xfe,0x28,0x6f,0xaa,0xe0,0xef,0xff,0xff]
vmovdqu64 -4128(%rdx), %ymm29
+// CHECK: vmovntdq %xmm22, (%rcx)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0xe7,0x31]
+ vmovntdq %xmm22, (%rcx)
+
+// CHECK: vmovntdq %xmm22, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xe7,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vmovntdq %xmm22, 291(%rax,%r14,8)
+
+// CHECK: vmovntdq %xmm22, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0xe7,0x72,0x7f]
+ vmovntdq %xmm22, 2032(%rdx)
+
+// CHECK: vmovntdq %xmm22, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0xe7,0xb2,0x00,0x08,0x00,0x00]
+ vmovntdq %xmm22, 2048(%rdx)
+
+// CHECK: vmovntdq %xmm22, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0xe7,0x72,0x80]
+ vmovntdq %xmm22, -2048(%rdx)
+
+// CHECK: vmovntdq %xmm22, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0xe7,0xb2,0xf0,0xf7,0xff,0xff]
+ vmovntdq %xmm22, -2064(%rdx)
+
+// CHECK: vmovntdq %ymm19, (%rcx)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x19]
+ vmovntdq %ymm19, (%rcx)
+
+// CHECK: vmovntdq %ymm19, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa1,0x7d,0x28,0xe7,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vmovntdq %ymm19, 291(%rax,%r14,8)
+
+// CHECK: vmovntdq %ymm19, 4064(%rdx)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x5a,0x7f]
+ vmovntdq %ymm19, 4064(%rdx)
+
+// CHECK: vmovntdq %ymm19, 4096(%rdx)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x9a,0x00,0x10,0x00,0x00]
+ vmovntdq %ymm19, 4096(%rdx)
+
+// CHECK: vmovntdq %ymm19, -4096(%rdx)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x5a,0x80]
+ vmovntdq %ymm19, -4096(%rdx)
+
+// CHECK: vmovntdq %ymm19, -4128(%rdx)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x9a,0xe0,0xef,0xff,0xff]
+ vmovntdq %ymm19, -4128(%rdx)
+
+// CHECK: vmovntdqa (%rcx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x2a,0x01]
+ vmovntdqa (%rcx), %xmm24
+
+// CHECK: vmovntdqa 291(%rax,%r14,8), %xmm24
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x2a,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vmovntdqa 291(%rax,%r14,8), %xmm24
+
+// CHECK: vmovntdqa 2032(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x2a,0x42,0x7f]
+ vmovntdqa 2032(%rdx), %xmm24
+
+// CHECK: vmovntdqa 2048(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x2a,0x82,0x00,0x08,0x00,0x00]
+ vmovntdqa 2048(%rdx), %xmm24
+
+// CHECK: vmovntdqa -2048(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x2a,0x42,0x80]
+ vmovntdqa -2048(%rdx), %xmm24
+
+// CHECK: vmovntdqa -2064(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x2a,0x82,0xf0,0xf7,0xff,0xff]
+ vmovntdqa -2064(%rdx), %xmm24
+
+// CHECK: vmovntdqa (%rcx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x2a,0x21]
+ vmovntdqa (%rcx), %ymm28
+
+// CHECK: vmovntdqa 291(%rax,%r14,8), %ymm28
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x2a,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vmovntdqa 291(%rax,%r14,8), %ymm28
+
+// CHECK: vmovntdqa 4064(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x2a,0x62,0x7f]
+ vmovntdqa 4064(%rdx), %ymm28
+
+// CHECK: vmovntdqa 4096(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x2a,0xa2,0x00,0x10,0x00,0x00]
+ vmovntdqa 4096(%rdx), %ymm28
+
+// CHECK: vmovntdqa -4096(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x2a,0x62,0x80]
+ vmovntdqa -4096(%rdx), %ymm28
+
+// CHECK: vmovntdqa -4128(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x2a,0xa2,0xe0,0xef,0xff,0xff]
+ vmovntdqa -4128(%rdx), %ymm28
+
+// CHECK: vmovntpd %xmm17, (%rcx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x09]
+ vmovntpd %xmm17, (%rcx)
+
+// CHECK: vmovntpd %xmm17, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa1,0xfd,0x08,0x2b,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vmovntpd %xmm17, 291(%rax,%r14,8)
+
+// CHECK: vmovntpd %xmm17, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x4a,0x7f]
+ vmovntpd %xmm17, 2032(%rdx)
+
+// CHECK: vmovntpd %xmm17, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x8a,0x00,0x08,0x00,0x00]
+ vmovntpd %xmm17, 2048(%rdx)
+
+// CHECK: vmovntpd %xmm17, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x4a,0x80]
+ vmovntpd %xmm17, -2048(%rdx)
+
+// CHECK: vmovntpd %xmm17, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x8a,0xf0,0xf7,0xff,0xff]
+ vmovntpd %xmm17, -2064(%rdx)
+
+// CHECK: vmovntpd %ymm27, (%rcx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x28,0x2b,0x19]
+ vmovntpd %ymm27, (%rcx)
+
+// CHECK: vmovntpd %ymm27, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x21,0xfd,0x28,0x2b,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vmovntpd %ymm27, 291(%rax,%r14,8)
+
+// CHECK: vmovntpd %ymm27, 4064(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x28,0x2b,0x5a,0x7f]
+ vmovntpd %ymm27, 4064(%rdx)
+
+// CHECK: vmovntpd %ymm27, 4096(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x28,0x2b,0x9a,0x00,0x10,0x00,0x00]
+ vmovntpd %ymm27, 4096(%rdx)
+
+// CHECK: vmovntpd %ymm27, -4096(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x28,0x2b,0x5a,0x80]
+ vmovntpd %ymm27, -4096(%rdx)
+
+// CHECK: vmovntpd %ymm27, -4128(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x28,0x2b,0x9a,0xe0,0xef,0xff,0xff]
+ vmovntpd %ymm27, -4128(%rdx)
+
+// CHECK: vmovntps %xmm26, (%rcx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x2b,0x11]
+ vmovntps %xmm26, (%rcx)
+
+// CHECK: vmovntps %xmm26, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x21,0x7c,0x08,0x2b,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vmovntps %xmm26, 291(%rax,%r14,8)
+
+// CHECK: vmovntps %xmm26, 2032(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x2b,0x52,0x7f]
+ vmovntps %xmm26, 2032(%rdx)
+
+// CHECK: vmovntps %xmm26, 2048(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x2b,0x92,0x00,0x08,0x00,0x00]
+ vmovntps %xmm26, 2048(%rdx)
+
+// CHECK: vmovntps %xmm26, -2048(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x2b,0x52,0x80]
+ vmovntps %xmm26, -2048(%rdx)
+
+// CHECK: vmovntps %xmm26, -2064(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x2b,0x92,0xf0,0xf7,0xff,0xff]
+ vmovntps %xmm26, -2064(%rdx)
+
+// CHECK: vmovntps %ymm28, (%rcx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x28,0x2b,0x21]
+ vmovntps %ymm28, (%rcx)
+
+// CHECK: vmovntps %ymm28, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x21,0x7c,0x28,0x2b,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vmovntps %ymm28, 291(%rax,%r14,8)
+
+// CHECK: vmovntps %ymm28, 4064(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x28,0x2b,0x62,0x7f]
+ vmovntps %ymm28, 4064(%rdx)
+
+// CHECK: vmovntps %ymm28, 4096(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x28,0x2b,0xa2,0x00,0x10,0x00,0x00]
+ vmovntps %ymm28, 4096(%rdx)
+
+// CHECK: vmovntps %ymm28, -4096(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x28,0x2b,0x62,0x80]
+ vmovntps %ymm28, -4096(%rdx)
+
+// CHECK: vmovntps %ymm28, -4128(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x28,0x2b,0xa2,0xe0,0xef,0xff,0xff]
+ vmovntps %ymm28, -4128(%rdx)
+
// CHECK: vmovupd %xmm22, %xmm24
// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x10,0xc6]
vmovupd %xmm22, %xmm24
More information about the llvm-commits
mailing list