[llvm] r277305 - [AVX512] Always use EVEX encodings for 128/256-bit move instructions in getLoadStoreRegOpcode if VLX is supported.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 31 13:20:06 PDT 2016
Author: ctopper
Date: Sun Jul 31 15:20:05 2016
New Revision: 277305
URL: http://llvm.org/viewvc/llvm-project?rev=277305&view=rev
Log:
[AVX512] Always use EVEX encodings for 128/256-bit move instructions in getLoadStoreRegOpcode if VLX is supported.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=277305&r1=277304&r2=277305&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sun Jul 31 15:20:05 2016
@@ -4840,6 +4840,7 @@ static unsigned getLoadStoreRegOpcode(un
bool load) {
bool HasAVX = STI.hasAVX();
bool HasAVX512 = STI.hasAVX512();
+ bool HasVLX = STI.hasVLX();
if (HasAVX512 && isMaskRegClass(RC))
return getLoadStoreMaskRegOpcode(RC, load);
@@ -4884,38 +4885,28 @@ static unsigned getLoadStoreRegOpcode(un
assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
return load ? X86::LD_Fp80m : X86::ST_FpP80m;
case 16: {
- assert((X86::VR128RegClass.hasSubClassEq(RC) ||
- X86::VR128XRegClass.hasSubClassEq(RC))&& "Unknown 16-byte regclass");
+ assert(X86::VR128XRegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass");
// If stack is realigned we can use aligned stores.
- if (X86::VR128RegClass.hasSubClassEq(RC)) {
- if (isStackAligned)
- return load ? (HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm)
- : (HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr);
- else
- return load ? (HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm)
- : (HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
- }
- assert(STI.hasVLX() && "Using extended register requires VLX");
if (isStackAligned)
- return load ? X86::VMOVAPSZ128rm : X86::VMOVAPSZ128mr;
+ return load ?
+ (HasVLX ? X86::VMOVAPSZ128rm : HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm):
+ (HasVLX ? X86::VMOVAPSZ128mr : HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr);
else
- return load ? X86::VMOVUPSZ128rm : X86::VMOVUPSZ128mr;
+ return load ?
+ (HasVLX ? X86::VMOVUPSZ128rm : HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm):
+ (HasVLX ? X86::VMOVUPSZ128mr : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
}
case 32:
- assert((X86::VR256RegClass.hasSubClassEq(RC) ||
- X86::VR256XRegClass.hasSubClassEq(RC)) && "Unknown 32-byte regclass");
+ assert(X86::VR256XRegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
// If stack is realigned we can use aligned stores.
- if (X86::VR256RegClass.hasSubClassEq(RC)) {
- if (isStackAligned)
- return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr;
- else
- return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr;
- }
- assert(STI.hasVLX() && "Using extended register requires VLX");
if (isStackAligned)
- return load ? X86::VMOVAPSZ256rm : X86::VMOVAPSZ256mr;
+ return load ?
+ (HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm) :
+ (HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr);
else
- return load ? X86::VMOVUPSZ256rm : X86::VMOVUPSZ256mr;
+ return load ?
+ (HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm) :
+ (HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr);
case 64:
assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass");
assert(STI.hasAVX512() && "Using 512-bit register requires AVX512");
Modified: llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics.ll?rev=277305&r1=277304&r2=277305&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics.ll Sun Jul 31 15:20:05 2016
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=+avx512vl -mattr=+avx512vbmi --show-mc-encoding| FileCheck %s
declare <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
Modified: llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll?rev=277305&r1=277304&r2=277305&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll Sun Jul 31 15:20:05 2016
@@ -137,16 +137,16 @@ define <4 x float> @cvt_4i16_to_4f32(<4
; AVX512VL-NEXT: shrq $48, %rdi
; AVX512VL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<kill>
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %bx, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %bp, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl %r14d, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX512VL-NEXT: vinsertps $32, {{[0-9]+}}(%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
@@ -258,16 +258,16 @@ define <4 x float> @cvt_8i16_to_4f32(<8
; AVX512VL-NEXT: shrq $48, %rdi
; AVX512VL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<kill>
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %bx, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %bp, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl %r14d, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX512VL-NEXT: vinsertps $32, {{[0-9]+}}(%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
@@ -458,34 +458,34 @@ define <8 x float> @cvt_8i16_to_8f32(<8
; AVX512VL-NEXT: shrq $48, %rdi
; AVX512VL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<kill>
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %bx, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %r14w, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl %ebp, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl %r13d, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %r12w, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %r15w, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl {{[0-9]+}}(%rsp), %edi # 4-byte Reload
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX512VL-NEXT: vinsertps $32, {{[0-9]+}}(%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
; AVX512VL-NEXT: vinsertps $48, {{[0-9]+}}(%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm0 = xmm0[0,1,2],mem[0]
-; AVX512VL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
; AVX512VL-NEXT: vinsertps $16, {{[0-9]+}}(%rsp), %xmm1, %xmm1 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[2,3]
; AVX512VL-NEXT: vinsertps $32, {{[0-9]+}}(%rsp), %xmm1, %xmm1 # 16-byte Folded Reload
@@ -840,64 +840,64 @@ define <16 x float> @cvt_16i16_to_16f32(
; AVX512VL-NEXT: shrq $48, %rdi
; AVX512VL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<kill>
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %r12w, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %bp, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl %r14d, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl %ebx, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %r13w, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %r15w, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl {{[0-9]+}}(%rsp), %edi # 4-byte Reload
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
; AVX512VL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<kill>
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
; AVX512VL-NEXT: movzwl %ax, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
; AVX512VL-NEXT: movzwl %ax, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl {{[0-9]+}}(%rsp), %edi # 4-byte Reload
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
; AVX512VL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<kill>
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
; AVX512VL-NEXT: movzwl %ax, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
; AVX512VL-NEXT: movzwl %ax, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl {{[0-9]+}}(%rsp), %edi # 4-byte Reload
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX512VL-NEXT: vinsertps $32, {{[0-9]+}}(%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
; AVX512VL-NEXT: vinsertps $48, {{[0-9]+}}(%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm0 = xmm0[0,1,2],mem[0]
-; AVX512VL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
; AVX512VL-NEXT: vinsertps $16, {{[0-9]+}}(%rsp), %xmm1, %xmm1 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[2,3]
; AVX512VL-NEXT: vinsertps $32, {{[0-9]+}}(%rsp), %xmm1, %xmm1 # 16-byte Folded Reload
@@ -905,14 +905,14 @@ define <16 x float> @cvt_16i16_to_16f32(
; AVX512VL-NEXT: vinsertps $48, {{[0-9]+}}(%rsp), %xmm1, %xmm1 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm1 = xmm1[0,1,2],mem[0]
; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm1, %ymm0
-; AVX512VL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
; AVX512VL-NEXT: vinsertps $16, {{[0-9]+}}(%rsp), %xmm1, %xmm1 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[2,3]
; AVX512VL-NEXT: vinsertps $32, {{[0-9]+}}(%rsp), %xmm1, %xmm1 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm1 = xmm1[0,1],mem[0],xmm1[3]
; AVX512VL-NEXT: vinsertps $48, {{[0-9]+}}(%rsp), %xmm1, %xmm1 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm1 = xmm1[0,1,2],mem[0]
-; AVX512VL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload
; AVX512VL-NEXT: vinsertps $16, {{[0-9]+}}(%rsp), %xmm2, %xmm2 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm2 = xmm2[0],mem[0],xmm2[2,3]
; AVX512VL-NEXT: vinsertps $32, {{[0-9]+}}(%rsp), %xmm2, %xmm2 # 16-byte Folded Reload
@@ -1035,16 +1035,16 @@ define <4 x float> @load_cvt_4i16_to_4f3
; AVX512VL-NEXT: movq %rdi, %rbx
; AVX512VL-NEXT: movzwl 6(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 4(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl (%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 2(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm1 # 16-byte Reload
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX512VL-NEXT: vinsertps $32, {{[0-9]+}}(%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
@@ -1156,16 +1156,16 @@ define <4 x float> @load_cvt_8i16_to_4f3
; AVX512VL-NEXT: shrq $48, %rdi
; AVX512VL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<kill>
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %bx, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %bp, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl %r14d, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX512VL-NEXT: vinsertps $32, {{[0-9]+}}(%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
@@ -1296,34 +1296,34 @@ define <8 x float> @load_cvt_8i16_to_8f3
; AVX512VL-NEXT: movq %rdi, %rbx
; AVX512VL-NEXT: movzwl 6(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 4(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl (%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 2(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 14(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 12(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 8(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 10(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm1 # 16-byte Reload
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX512VL-NEXT: vinsertps $32, {{[0-9]+}}(%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
; AVX512VL-NEXT: vinsertps $48, {{[0-9]+}}(%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm0 = xmm0[0,1,2],mem[0]
-; AVX512VL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
; AVX512VL-NEXT: vinsertps $16, {{[0-9]+}}(%rsp), %xmm1, %xmm1 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[2,3]
; AVX512VL-NEXT: vinsertps $32, {{[0-9]+}}(%rsp), %xmm1, %xmm1 # 16-byte Folded Reload
@@ -1547,58 +1547,58 @@ define <16 x float> @load_cvt_16i16_to_1
; AVX512VL-NEXT: movq %rdi, %rbx
; AVX512VL-NEXT: movzwl 6(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 4(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl (%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 2(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 14(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 12(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 8(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 10(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 22(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 20(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 16(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 18(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 30(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 28(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 24(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl 26(%rbx), %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm1 # 16-byte Reload
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX512VL-NEXT: vinsertps $32, {{[0-9]+}}(%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
; AVX512VL-NEXT: vinsertps $48, {{[0-9]+}}(%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm0 = xmm0[0,1,2],mem[0]
-; AVX512VL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
; AVX512VL-NEXT: vinsertps $16, {{[0-9]+}}(%rsp), %xmm1, %xmm1 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[2,3]
; AVX512VL-NEXT: vinsertps $32, {{[0-9]+}}(%rsp), %xmm1, %xmm1 # 16-byte Folded Reload
@@ -1606,14 +1606,14 @@ define <16 x float> @load_cvt_16i16_to_1
; AVX512VL-NEXT: vinsertps $48, {{[0-9]+}}(%rsp), %xmm1, %xmm1 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm1 = xmm1[0,1,2],mem[0]
; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm1, %ymm0
-; AVX512VL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
; AVX512VL-NEXT: vinsertps $16, {{[0-9]+}}(%rsp), %xmm1, %xmm1 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[2,3]
; AVX512VL-NEXT: vinsertps $32, {{[0-9]+}}(%rsp), %xmm1, %xmm1 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm1 = xmm1[0,1],mem[0],xmm1[3]
; AVX512VL-NEXT: vinsertps $48, {{[0-9]+}}(%rsp), %xmm1, %xmm1 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm1 = xmm1[0,1,2],mem[0]
-; AVX512VL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload
; AVX512VL-NEXT: vinsertps $16, {{[0-9]+}}(%rsp), %xmm2, %xmm2 # 16-byte Folded Reload
; AVX512VL-NEXT: # xmm2 = xmm2[0],mem[0],xmm2[2,3]
; AVX512VL-NEXT: vinsertps $32, {{[0-9]+}}(%rsp), %xmm2, %xmm2 # 16-byte Folded Reload
@@ -1735,7 +1735,7 @@ define <2 x double> @cvt_2i16_to_2f64(<2
; AVX512VL-NEXT: shrl $16, %ebx
; AVX512VL-NEXT: movzwl %ax, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl %ebx, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
@@ -1857,13 +1857,13 @@ define <4 x double> @cvt_4i16_to_4f64(<4
; AVX512VL-NEXT: shrl $16, %ebp
; AVX512VL-NEXT: movzwl %ax, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl %ebp, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %bx, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl %r14d, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
@@ -1943,7 +1943,7 @@ define <2 x double> @cvt_8i16_to_2f64(<8
; AVX512VL-NEXT: shrl $16, %ebx
; AVX512VL-NEXT: movzwl %ax, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl %ebx, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
@@ -2064,13 +2064,13 @@ define <4 x double> @cvt_8i16_to_4f64(<8
; AVX512VL-NEXT: shrl $16, %ebp
; AVX512VL-NEXT: movzwl %ax, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl %ebp, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %bx, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl %r14d, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
@@ -2284,25 +2284,25 @@ define <8 x double> @cvt_8i16_to_8f64(<8
; AVX512VL-NEXT: shrl $16, %ebp
; AVX512VL-NEXT: movzwl %ax, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl %ebp, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %r14w, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl %ebx, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %r13w, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl %r15d, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %r12w, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
; AVX512VL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<kill>
; AVX512VL-NEXT: callq __gnu_h2f_ieee
@@ -2658,13 +2658,13 @@ define <4 x double> @load_cvt_8i16_to_4f
; AVX512VL-NEXT: shrl $16, %ebp
; AVX512VL-NEXT: movzwl %ax, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl %ebp, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movzwl %bx, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
-; AVX512VL-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: movl %r14d, %edi
; AVX512VL-NEXT: callq __gnu_h2f_ieee
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
@@ -2988,7 +2988,7 @@ define <4 x i16> @cvt_4f32_to_4i16(<4 x
; AVX512VL-NEXT: pushq %r14
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $24, %rsp
-; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
@@ -2997,12 +2997,12 @@ define <4 x i16> @cvt_4f32_to_4i16(<4 x
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %r14d
; AVX512VL-NEXT: orl %ebx, %r14d
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %eax
@@ -3100,7 +3100,7 @@ define <8 x i16> @cvt_4f32_to_8i16_undef
; AVX512VL-NEXT: pushq %r14
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $24, %rsp
-; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
@@ -3109,12 +3109,12 @@ define <8 x i16> @cvt_4f32_to_8i16_undef
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %r14d
; AVX512VL-NEXT: orl %ebx, %r14d
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %eax
@@ -3216,7 +3216,7 @@ define <8 x i16> @cvt_4f32_to_8i16_zero(
; AVX512VL-NEXT: pushq %r14
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $24, %rsp
-; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
@@ -3225,12 +3225,12 @@ define <8 x i16> @cvt_4f32_to_8i16_zero(
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %r14d
; AVX512VL-NEXT: orl %ebx, %r14d
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %eax
@@ -3400,7 +3400,7 @@ define <8 x i16> @cvt_8f32_to_8i16(<8 x
; AVX512VL-NEXT: pushq %r14
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $32, %rsp
-; AVX512VL-NEXT: vmovdqu %ymm0, (%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
@@ -3410,12 +3410,12 @@ define <8 x i16> @cvt_8f32_to_8i16(<8 x
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %r15d
; AVX512VL-NEXT: orl %ebx, %r15d
-; AVX512VL-NEXT: vmovdqu (%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovdqu64 (%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
-; AVX512VL-NEXT: vmovdqu (%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovdqu64 (%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %r14d
@@ -3433,12 +3433,12 @@ define <8 x i16> @cvt_8f32_to_8i16(<8 x
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %r15d
; AVX512VL-NEXT: orl %ebx, %r15d
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %eax
@@ -3677,12 +3677,12 @@ define <16 x i16> @cvt_16f32_to_16i16(<1
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: # kill: %AX<def> %AX<kill> %EAX<def>
; AVX512VL-NEXT: movl %eax, {{[0-9]+}}(%rsp) # 4-byte Spill
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: # kill: %AX<def> %AX<kill> %EAX<def>
; AVX512VL-NEXT: movl %eax, {{[0-9]+}}(%rsp) # 4-byte Spill
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: # kill: %AX<def> %AX<kill> %EAX<def>
@@ -3720,22 +3720,22 @@ define <16 x i16> @cvt_16f32_to_16i16(<1
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: # kill: %AX<def> %AX<kill> %EAX<def>
; AVX512VL-NEXT: movl %eax, {{[0-9]+}}(%rsp) # 4-byte Spill
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %r13w
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bp
-; AVX512VL-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %r14w
-; AVX512VL-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %r15w
@@ -3743,7 +3743,7 @@ define <16 x i16> @cvt_16f32_to_16i16(<1
; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %r12w
-; AVX512VL-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: # kill: %AX<def> %AX<kill> %EAX<def>
@@ -3884,15 +3884,15 @@ define void @store_cvt_4f32_to_4i16(<4 x
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $24, %rsp
; AVX512VL-NEXT: movq %rdi, %rbx
-; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r14d
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r15d
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %ebp
@@ -4000,7 +4000,7 @@ define void @store_cvt_4f32_to_8i16_unde
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $16, %rsp
; AVX512VL-NEXT: movq %rdi, %r14
-; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bp
@@ -4009,12 +4009,12 @@ define void @store_cvt_4f32_to_8i16_unde
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %ebx
; AVX512VL-NEXT: orl %ebp, %ebx
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bp
; AVX512VL-NEXT: shll $16, %ebp
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %eax
@@ -4124,7 +4124,7 @@ define void @store_cvt_4f32_to_8i16_zero
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $16, %rsp
; AVX512VL-NEXT: movq %rdi, %r14
-; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bp
@@ -4133,12 +4133,12 @@ define void @store_cvt_4f32_to_8i16_zero
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %ebx
; AVX512VL-NEXT: orl %ebp, %ebx
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bp
; AVX512VL-NEXT: shll $16, %ebp
-; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %eax
@@ -4282,15 +4282,15 @@ define void @store_cvt_8f32_to_8i16(<8 x
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $72, %rsp
; AVX512VL-NEXT: movq %rdi, %rbx
-; AVX512VL-NEXT: vmovdqu %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovdqu64 %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r12d
@@ -4300,11 +4300,11 @@ define void @store_cvt_8f32_to_8i16(<8 x
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r13d
-; AVX512VL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %ebp
-; AVX512VL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r14d
@@ -4568,11 +4568,11 @@ define void @store_cvt_16f32_to_16i16(<1
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
@@ -4582,11 +4582,11 @@ define void @store_cvt_16f32_to_16i16(<1
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
@@ -4596,11 +4596,11 @@ define void @store_cvt_16f32_to_16i16(<1
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r14d
-; AVX512VL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r15d
@@ -4729,7 +4729,7 @@ define <2 x i16> @cvt_2f64_to_2i16(<2 x
; AVX512VL: # BB#0:
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $16, %rsp
-; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bx
@@ -4861,7 +4861,7 @@ define <4 x i16> @cvt_4f64_to_4i16(<4 x
; AVX512VL-NEXT: pushq %r14
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $40, %rsp
-; AVX512VL-NEXT: vmovdqu %ymm0, (%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bx
@@ -5011,7 +5011,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef
; AVX512VL-NEXT: pushq %r14
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $40, %rsp
-; AVX512VL-NEXT: vmovdqu %ymm0, (%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bx
@@ -5165,7 +5165,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(
; AVX512VL-NEXT: pushq %r14
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $40, %rsp
-; AVX512VL-NEXT: vmovdqu %ymm0, (%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bx
@@ -5554,7 +5554,7 @@ define void @store_cvt_2f64_to_2i16(<2 x
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $24, %rsp
; AVX512VL-NEXT: movq %rdi, %rbx
-; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movl %eax, %ebp
@@ -5692,7 +5692,7 @@ define void @store_cvt_4f64_to_4i16(<4 x
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $88, %rsp
; AVX512VL-NEXT: movq %rdi, %rbx
-; AVX512VL-NEXT: vmovdqu %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovdqu64 %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movl %eax, %r14d
@@ -5855,7 +5855,7 @@ define void @store_cvt_4f64_to_8i16_unde
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $32, %rsp
; AVX512VL-NEXT: movq %rdi, %r14
-; AVX512VL-NEXT: vmovdqu %ymm0, (%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bp
@@ -6026,7 +6026,7 @@ define void @store_cvt_4f64_to_8i16_zero
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $32, %rsp
; AVX512VL-NEXT: movq %rdi, %r14
-; AVX512VL-NEXT: vmovdqu %ymm0, (%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bp
More information about the llvm-commits
mailing list