[llvm] r287975 - [AVX-512] Add unmasked EVEX vpmovzx/sx instructions to load folding tables.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 26 00:21:53 PST 2016
Author: ctopper
Date: Sat Nov 26 02:21:52 2016
New Revision: 287975
URL: http://llvm.org/viewvc/llvm-project?rev=287975&view=rev
Log:
[AVX-512] Add unmasked EVEX vpmovzx/sx instructions to load folding tables.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx1.ll
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=287975&r1=287974&r2=287975&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sat Nov 26 02:21:52 2016
@@ -879,6 +879,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 },
{ X86::VPABSDZrr, X86::VPABSDZrm, 0 },
{ X86::VPABSQZrr, X86::VPABSQZrm, 0 },
+ { X86::VPMOVSXBDZrr, X86::VPMOVSXBDZrm, 0 },
+ { X86::VPMOVSXBQZrr, X86::VPMOVSXBQZrm, 0 },
+ { X86::VPMOVSXBWZrr, X86::VPMOVSXBWZrm, 0 },
+ { X86::VPMOVSXDQZrr, X86::VPMOVSXDQZrm, 0 },
+ { X86::VPMOVSXWDZrr, X86::VPMOVSXWDZrm, 0 },
+ { X86::VPMOVSXWQZrr, X86::VPMOVSXWQZrm, 0 },
+ { X86::VPMOVZXBDZrr, X86::VPMOVZXBDZrm, 0 },
+ { X86::VPMOVZXBQZrr, X86::VPMOVZXBQZrm, 0 },
+ { X86::VPMOVZXBWZrr, X86::VPMOVZXBWZrm, 0 },
+ { X86::VPMOVZXDQZrr, X86::VPMOVZXDQZrm, 0 },
+ { X86::VPMOVZXWDZrr, X86::VPMOVZXWDZrm, 0 },
+ { X86::VPMOVZXWQZrr, X86::VPMOVZXWQZrm, 0 },
// AVX-512 foldable instructions (256-bit versions)
{ X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE },
@@ -895,6 +907,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 },
{ X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 },
{ X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 },
+ { X86::VPMOVSXBDZ256rr, X86::VPMOVSXBDZ256rm, 0 },
+ { X86::VPMOVSXBQZ256rr, X86::VPMOVSXBQZ256rm, 0 },
+ { X86::VPMOVSXBWZ256rr, X86::VPMOVSXBWZ256rm, 0 },
+ { X86::VPMOVSXDQZ256rr, X86::VPMOVSXDQZ256rm, 0 },
+ { X86::VPMOVSXWDZ256rr, X86::VPMOVSXWDZ256rm, 0 },
+ { X86::VPMOVSXWQZ256rr, X86::VPMOVSXWQZ256rm, 0 },
+ { X86::VPMOVZXBDZ256rr, X86::VPMOVZXBDZ256rm, 0 },
+ { X86::VPMOVZXBQZ256rr, X86::VPMOVZXBQZ256rm, 0 },
+ { X86::VPMOVZXBWZ256rr, X86::VPMOVZXBWZ256rm, 0 },
+ { X86::VPMOVZXDQZ256rr, X86::VPMOVZXDQZ256rm, 0 },
+ { X86::VPMOVZXWDZ256rr, X86::VPMOVZXWDZ256rm, 0 },
+ { X86::VPMOVZXWQZ256rr, X86::VPMOVZXWQZ256rm, 0 },
// AVX-512 foldable instructions (128-bit versions)
{ X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE },
@@ -909,6 +933,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 },
{ X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 },
{ X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 },
+ { X86::VPMOVSXBDZ128rr, X86::VPMOVSXBDZ128rm, 0 },
+ { X86::VPMOVSXBQZ128rr, X86::VPMOVSXBQZ128rm, 0 },
+ { X86::VPMOVSXBWZ128rr, X86::VPMOVSXBWZ128rm, 0 },
+ { X86::VPMOVSXDQZ128rr, X86::VPMOVSXDQZ128rm, 0 },
+ { X86::VPMOVSXWDZ128rr, X86::VPMOVSXWDZ128rm, 0 },
+ { X86::VPMOVSXWQZ128rr, X86::VPMOVSXWQZ128rm, 0 },
+ { X86::VPMOVZXBDZ128rr, X86::VPMOVZXBDZ128rm, 0 },
+ { X86::VPMOVZXBQZ128rr, X86::VPMOVZXBQZ128rm, 0 },
+ { X86::VPMOVZXBWZ128rr, X86::VPMOVZXBWZ128rm, 0 },
+ { X86::VPMOVZXDQZ128rr, X86::VPMOVZXDQZ128rm, 0 },
+ { X86::VPMOVZXWDZ128rr, X86::VPMOVZXWDZ128rm, 0 },
+ { X86::VPMOVZXWQZ128rr, X86::VPMOVZXWQZ128rm, 0 },
// F16C foldable instructions
{ X86::VCVTPH2PSrr, X86::VCVTPH2PSrm, 0 },
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx1.ll?rev=287975&r1=287974&r2=287975&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx1.ll Sat Nov 26 02:21:52 2016
@@ -667,114 +667,6 @@ define <8 x i16> @stack_fold_pminuw(<8 x
}
declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
-define <4 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
- ;CHECK-LABEL: stack_fold_pmovsxbd
- ;CHECK: vpmovsxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = sext <4 x i8> %2 to <4 x i32>
- ret <4 x i32> %3
-}
-
-define <2 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) {
- ;CHECK-LABEL: stack_fold_pmovsxbq
- ;CHECK: vpmovsxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
- %3 = sext <2 x i8> %2 to <2 x i64>
- ret <2 x i64> %3
-}
-
-define <8 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
- ;CHECK-LABEL: stack_fold_pmovsxbw
- ;CHECK: vpmovsxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %3 = sext <8 x i8> %2 to <8 x i16>
- ret <8 x i16> %3
-}
-
-define <2 x i64> @stack_fold_pmovsxdq(<4 x i32> %a0) {
- ;CHECK-LABEL: stack_fold_pmovsxdq
- ;CHECK: vpmovsxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
- %3 = sext <2 x i32> %2 to <2 x i64>
- ret <2 x i64> %3
-}
-
-define <4 x i32> @stack_fold_pmovsxwd(<8 x i16> %a0) {
- ;CHECK-LABEL: stack_fold_pmovsxwd
- ;CHECK: vpmovsxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = sext <4 x i16> %2 to <4 x i32>
- ret <4 x i32> %3
-}
-
-define <2 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
- ;CHECK-LABEL: stack_fold_pmovsxwq
- ;CHECK: vpmovsxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
- %3 = sext <2 x i16> %2 to <2 x i64>
- ret <2 x i64> %3
-}
-
-define <4 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) {
- ;CHECK-LABEL: stack_fold_pmovzxbd
- ;CHECK: vpmovzxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 1, i32 19, i32 20, i32 21, i32 2, i32 22, i32 23, i32 24, i32 3, i32 25, i32 26, i32 27>
- %3 = bitcast <16 x i8> %2 to <4 x i32>
- ret <4 x i32> %3
-}
-
-define <2 x i64> @stack_fold_pmovzxbq(<16 x i8> %a0) {
- ;CHECK-LABEL: stack_fold_pmovzxbq
- ;CHECK: vpmovzxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 1, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
- %3 = bitcast <16 x i8> %2 to <2 x i64>
- ret <2 x i64> %3
-}
-
-define <8 x i16> @stack_fold_pmovzxbw(<16 x i8> %a0) {
- ;CHECK-LABEL: stack_fold_pmovzxbw
- ;CHECK: vpmovzxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
- %3 = bitcast <16 x i8> %2 to <8 x i16>
- ret <8 x i16> %3
-}
-
-define <2 x i64> @stack_fold_pmovzxdq(<4 x i32> %a0) {
- ;CHECK-LABEL: stack_fold_pmovzxdq
- ;CHECK: vpmovzxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %3 = bitcast <4 x i32> %2 to <2 x i64>
- ret <2 x i64> %3
-}
-
-define <4 x i32> @stack_fold_pmovzxwd(<8 x i16> %a0) {
- ;CHECK-LABEL: stack_fold_pmovzxwd
- ;CHECK: vpmovzxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
- %3 = bitcast <8 x i16> %2 to <4 x i32>
- ret <4 x i32> %3
-}
-
-define <2 x i64> @stack_fold_pmovzxwq(<8 x i16> %a0) {
- ;CHECK-LABEL: stack_fold_pmovzxwq
- ;CHECK: vpmovzxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 1, i32 11, i32 12, i32 13>
- %3 = bitcast <8 x i16> %2 to <2 x i64>
- ret <2 x i64> %3
-}
-
define <2 x i64> @stack_fold_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
;CHECK-LABEL: stack_fold_pmuldq
;CHECK: vpmuldq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll?rev=287975&r1=287974&r2=287975&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll Sat Nov 26 02:21:52 2016
@@ -533,3 +533,101 @@ define <64 x i8> @stack_fold_vpermi2b(<6
ret <64 x i8> %res
}
declare <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
+
+define <16 x i32> @stack_fold_pmovsxbd_zmm(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxbd_zmm
+ ;CHECK: vpmovsxbd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = sext <16 x i8> %a0 to <16 x i32>
+ ret <16 x i32> %2
+}
+
+define <8 x i64> @stack_fold_pmovsxbq_zmm(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxbq_zmm
+ ;CHECK: pmovsxbq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %3 = sext <8 x i8> %2 to <8 x i64>
+ ret <8 x i64> %3
+}
+
+define <32 x i16> @stack_fold_pmovsxbw_zmm(<32 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxbw_zmm
+ ;CHECK: vpmovsxbw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = sext <32 x i8> %a0 to <32 x i16>
+ ret <32 x i16> %2
+}
+
+define <8 x i64> @stack_fold_pmovsxdq_zmm(<8 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxdq_zmm
+ ;CHECK: vpmovsxdq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = sext <8 x i32> %a0 to <8 x i64>
+ ret <8 x i64> %2
+}
+
+define <16 x i32> @stack_fold_pmovsxwd_zmm(<16 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxwd_zmm
+ ;CHECK: vpmovsxwd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = sext <16 x i16> %a0 to <16 x i32>
+ ret <16 x i32> %2
+}
+
+define <8 x i64> @stack_fold_pmovsxwq_zmm(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxwq_zmm
+ ;CHECK: vpmovsxwq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = sext <8 x i16> %a0 to <8 x i64>
+ ret <8 x i64> %2
+}
+
+define <16 x i32> @stack_fold_pmovzxbd_zmm(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxbd_zmm
+ ;CHECK: vpmovzxbd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = zext <16 x i8> %a0 to <16 x i32>
+ ret <16 x i32> %2
+}
+
+define <8 x i64> @stack_fold_pmovzxbq_zmm(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxbq_zmm
+ ;CHECK: vpmovzxbq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %3 = zext <8 x i8> %2 to <8 x i64>
+ ret <8 x i64> %3
+}
+
+define <32 x i16> @stack_fold_pmovzxbw_zmm(<32 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxbw_zmm
+ ;CHECK: vpmovzxbw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = zext <32 x i8> %a0 to <32 x i16>
+ ret <32 x i16> %2
+}
+
+define <8 x i64> @stack_fold_pmovzxdq_zmm(<8 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxdq_zmm
+ ;CHECK: vpmovzxdq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = zext <8 x i32> %a0 to <8 x i64>
+ ret <8 x i64> %2
+}
+
+define <16 x i32> @stack_fold_pmovzxwd_zmm(<16 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxwd_zmm
+ ;CHECK: vpmovzxwd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = zext <16 x i16> %a0 to <16 x i32>
+ ret <16 x i32> %2
+}
+
+define <8 x i64> @stack_fold_pmovzxwq_zmm(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxwq_zmm
+ ;CHECK: vpmovzxwq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = zext <8 x i16> %a0 to <8 x i64>
+ ret <8 x i64> %2
+}
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll?rev=287975&r1=287974&r2=287975&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll Sat Nov 26 02:21:52 2016
@@ -698,3 +698,213 @@ define <32 x i8> @stack_fold_vpermi2b_ym
ret <32 x i8> %res
}
declare <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
+
+define <4 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxbd
+ ;CHECK: vpmovsxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = sext <4 x i8> %2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <2 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxbq
+ ;CHECK: vpmovsxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+ %3 = sext <2 x i8> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <8 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxbw
+ ;CHECK: vpmovsxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %3 = sext <8 x i8> %2 to <8 x i16>
+ ret <8 x i16> %3
+}
+
+define <2 x i64> @stack_fold_pmovsxdq(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxdq
+ ;CHECK: vpmovsxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %3 = sext <2 x i32> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <4 x i32> @stack_fold_pmovsxwd(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxwd
+ ;CHECK: vpmovsxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = sext <4 x i16> %2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <2 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxwq
+ ;CHECK: vpmovsxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+ %3 = sext <2 x i16> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <4 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxbd
+ ;CHECK: vpmovzxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 1, i32 19, i32 20, i32 21, i32 2, i32 22, i32 23, i32 24, i32 3, i32 25, i32 26, i32 27>
+ %3 = bitcast <16 x i8> %2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <2 x i64> @stack_fold_pmovzxbq(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxbq
+ ;CHECK: vpmovzxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 1, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+ %3 = bitcast <16 x i8> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <8 x i16> @stack_fold_pmovzxbw(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxbw
+ ;CHECK: vpmovzxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+ %3 = bitcast <16 x i8> %2 to <8 x i16>
+ ret <8 x i16> %3
+}
+
+define <2 x i64> @stack_fold_pmovzxdq(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxdq
+ ;CHECK: vpmovzxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ %3 = bitcast <4 x i32> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <4 x i32> @stack_fold_pmovzxwd(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxwd
+ ;CHECK: vpmovzxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ %3 = bitcast <8 x i16> %2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <2 x i64> @stack_fold_pmovzxwq(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxwq
+ ;CHECK: vpmovzxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 1, i32 11, i32 12, i32 13>
+ %3 = bitcast <8 x i16> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <8 x i32> @stack_fold_pmovsxbd_ymm(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxbd_ymm
+ ;CHECK: vpmovsxbd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %3 = sext <8 x i8> %2 to <8 x i32>
+ ret <8 x i32> %3
+}
+
+define <4 x i64> @stack_fold_pmovsxbq_ymm(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxbq_ymm
+ ;CHECK: pmovsxbq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = sext <4 x i8> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+define <16 x i16> @stack_fold_pmovsxbw_ymm(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxbw_ymm
+ ;CHECK: vpmovsxbw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = sext <16 x i8> %a0 to <16 x i16>
+ ret <16 x i16> %2
+}
+
+define <4 x i64> @stack_fold_pmovsxdq_ymm(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxdq_ymm
+ ;CHECK: vpmovsxdq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = sext <4 x i32> %a0 to <4 x i64>
+ ret <4 x i64> %2
+}
+
+define <8 x i32> @stack_fold_pmovsxwd_ymm(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxwd_ymm
+ ;CHECK: vpmovsxwd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = sext <8 x i16> %a0 to <8 x i32>
+ ret <8 x i32> %2
+}
+
+define <4 x i64> @stack_fold_pmovsxwq_ymm(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxwq_ymm
+ ;CHECK: vpmovsxwq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = sext <4 x i16> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+define <8 x i32> @stack_fold_pmovzxbd_ymm(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxbd_ymm
+ ;CHECK: vpmovzxbd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %3 = zext <8 x i8> %2 to <8 x i32>
+ ret <8 x i32> %3
+}
+
+define <4 x i64> @stack_fold_pmovzxbq_ymm(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxbq_ymm
+ ;CHECK: vpmovzxbq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = zext <4 x i8> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+define <16 x i16> @stack_fold_pmovzxbw_ymm(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxbw_ymm
+ ;CHECK: vpmovzxbw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = zext <16 x i8> %a0 to <16 x i16>
+ ret <16 x i16> %2
+}
+
+define <4 x i64> @stack_fold_pmovzxdq_ymm(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxdq_ymm
+ ;CHECK: vpmovzxdq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = zext <4 x i32> %a0 to <4 x i64>
+ ret <4 x i64> %2
+}
+
+define <8 x i32> @stack_fold_pmovzxwd_ymm(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxwd_ymm
+ ;CHECK: vpmovzxwd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = zext <8 x i16> %a0 to <8 x i32>
+ ret <8 x i32> %2
+}
+
+define <4 x i64> @stack_fold_pmovzxwq_ymm(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxwq_ymm
+ ;CHECK: vpmovzxwq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = zext <4 x i16> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
More information about the llvm-commits
mailing list