[llvm] r295154 - [AVX-512] Add PACKSS/PACKUS instructions to load folding tables.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 14 22:51:40 PST 2017
Author: ctopper
Date: Wed Feb 15 00:51:39 2017
New Revision: 295154
URL: http://llvm.org/viewvc/llvm-project?rev=295154&view=rev
Log:
[AVX-512] Add PACKSS/PACKUS instructions to load folding tables.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=295154&r1=295153&r2=295154&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Wed Feb 15 00:51:39 2017
@@ -1894,6 +1894,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VMULSSZrr_Int, X86::VMULSSZrm_Int, TB_NO_REVERSE },
{ X86::VORPDZrr, X86::VORPDZrm, 0 },
{ X86::VORPSZrr, X86::VORPSZrm, 0 },
+ { X86::VPACKSSDWZrr, X86::VPACKSSDWZrm, 0 },
+ { X86::VPACKSSWBZrr, X86::VPACKSSWBZrm, 0 },
+ { X86::VPACKUSDWZrr, X86::VPACKUSDWZrm, 0 },
+ { X86::VPACKUSWBZrr, X86::VPACKUSWBZrm, 0 },
{ X86::VPADDBZrr, X86::VPADDBZrm, 0 },
{ X86::VPADDDZrr, X86::VPADDDZrm, 0 },
{ X86::VPADDQZrr, X86::VPADDQZrm, 0 },
@@ -2068,6 +2072,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VORPDZ256rr, X86::VORPDZ256rm, 0 },
{ X86::VORPSZ128rr, X86::VORPSZ128rm, 0 },
{ X86::VORPSZ256rr, X86::VORPSZ256rm, 0 },
+ { X86::VPACKSSDWZ256rr, X86::VPACKSSDWZ256rm, 0 },
+ { X86::VPACKSSDWZ128rr, X86::VPACKSSDWZ128rm, 0 },
+ { X86::VPACKSSWBZ256rr, X86::VPACKSSWBZ256rm, 0 },
+ { X86::VPACKSSWBZ128rr, X86::VPACKSSWBZ128rm, 0 },
+ { X86::VPACKUSDWZ256rr, X86::VPACKUSDWZ256rm, 0 },
+ { X86::VPACKUSDWZ128rr, X86::VPACKUSDWZ128rm, 0 },
+ { X86::VPACKUSWBZ256rr, X86::VPACKUSWBZ256rm, 0 },
+ { X86::VPACKUSWBZ128rr, X86::VPACKUSWBZ128rm, 0 },
{ X86::VPADDBZ128rr, X86::VPADDBZ128rm, 0 },
{ X86::VPADDBZ256rr, X86::VPADDBZ256rm, 0 },
{ X86::VPADDDZ128rr, X86::VPADDDZ128rm, 0 },
@@ -2562,6 +2574,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VMULSSZrr_Intkz, X86::VMULSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VORPDZrrkz, X86::VORPDZrmkz, 0 },
{ X86::VORPSZrrkz, X86::VORPSZrmkz, 0 },
+ { X86::VPACKSSDWZrrkz, X86::VPACKSSDWZrmkz, 0 },
+ { X86::VPACKSSWBZrrkz, X86::VPACKSSWBZrmkz, 0 },
+ { X86::VPACKUSDWZrrkz, X86::VPACKUSDWZrmkz, 0 },
+ { X86::VPACKUSWBZrrkz, X86::VPACKUSWBZrmkz, 0 },
{ X86::VPADDBZrrkz, X86::VPADDBZrmkz, 0 },
{ X86::VPADDDZrrkz, X86::VPADDDZrmkz, 0 },
{ X86::VPADDQZrrkz, X86::VPADDQZrmkz, 0 },
@@ -2687,6 +2703,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VMULPSZ256rrkz, X86::VMULPSZ256rmkz, 0 },
{ X86::VORPDZ256rrkz, X86::VORPDZ256rmkz, 0 },
{ X86::VORPSZ256rrkz, X86::VORPSZ256rmkz, 0 },
+ { X86::VPACKSSDWZ256rrkz, X86::VPACKSSDWZ256rmkz, 0 },
+ { X86::VPACKSSWBZ256rrkz, X86::VPACKSSWBZ256rmkz, 0 },
+ { X86::VPACKUSDWZ256rrkz, X86::VPACKUSDWZ256rmkz, 0 },
+ { X86::VPACKUSWBZ256rrkz, X86::VPACKUSWBZ256rmkz, 0 },
{ X86::VPADDBZ256rrkz, X86::VPADDBZ256rmkz, 0 },
{ X86::VPADDDZ256rrkz, X86::VPADDDZ256rmkz, 0 },
{ X86::VPADDQZ256rrkz, X86::VPADDQZ256rmkz, 0 },
@@ -2806,6 +2826,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VMULPSZ128rrkz, X86::VMULPSZ128rmkz, 0 },
{ X86::VORPDZ128rrkz, X86::VORPDZ128rmkz, 0 },
{ X86::VORPSZ128rrkz, X86::VORPSZ128rmkz, 0 },
+ { X86::VPACKSSDWZ128rrkz, X86::VPACKSSDWZ128rmkz, 0 },
+ { X86::VPACKSSWBZ128rrkz, X86::VPACKSSWBZ128rmkz, 0 },
+ { X86::VPACKUSDWZ128rrkz, X86::VPACKUSDWZ128rmkz, 0 },
+ { X86::VPACKUSWBZ128rrkz, X86::VPACKUSWBZ128rmkz, 0 },
{ X86::VPADDBZ128rrkz, X86::VPADDBZ128rmkz, 0 },
{ X86::VPADDDZ128rrkz, X86::VPADDDZ128rmkz, 0 },
{ X86::VPADDQZ128rrkz, X86::VPADDQZ128rmkz, 0 },
@@ -3069,6 +3093,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VMULSSZrr_Intk, X86::VMULSSZrm_Intk, TB_NO_REVERSE },
{ X86::VORPDZrrk, X86::VORPDZrmk, 0 },
{ X86::VORPSZrrk, X86::VORPSZrmk, 0 },
+ { X86::VPACKSSDWZrrk, X86::VPACKSSDWZrmk, 0 },
+ { X86::VPACKSSWBZrrk, X86::VPACKSSWBZrmk, 0 },
+ { X86::VPACKUSDWZrrk, X86::VPACKUSDWZrmk, 0 },
+ { X86::VPACKUSWBZrrk, X86::VPACKUSWBZrmk, 0 },
{ X86::VPADDBZrrk, X86::VPADDBZrmk, 0 },
{ X86::VPADDDZrrk, X86::VPADDDZrmk, 0 },
{ X86::VPADDQZrrk, X86::VPADDQZrmk, 0 },
@@ -3207,6 +3235,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VMULPSZ256rrk, X86::VMULPSZ256rmk, 0 },
{ X86::VORPDZ256rrk, X86::VORPDZ256rmk, 0 },
{ X86::VORPSZ256rrk, X86::VORPSZ256rmk, 0 },
+ { X86::VPACKSSDWZ256rrk, X86::VPACKSSDWZ256rmk, 0 },
+ { X86::VPACKSSWBZ256rrk, X86::VPACKSSWBZ256rmk, 0 },
+ { X86::VPACKUSDWZ256rrk, X86::VPACKUSDWZ256rmk, 0 },
+ { X86::VPACKUSWBZ256rrk, X86::VPACKUSWBZ256rmk, 0 },
{ X86::VPADDBZ256rrk, X86::VPADDBZ256rmk, 0 },
{ X86::VPADDDZ256rrk, X86::VPADDDZ256rmk, 0 },
{ X86::VPADDQZ256rrk, X86::VPADDQZ256rmk, 0 },
@@ -3340,6 +3372,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VMULPSZ128rrk, X86::VMULPSZ128rmk, 0 },
{ X86::VORPDZ128rrk, X86::VORPDZ128rmk, 0 },
{ X86::VORPSZ128rrk, X86::VORPSZ128rmk, 0 },
+ { X86::VPACKSSDWZ128rrk, X86::VPACKSSDWZ128rmk, 0 },
+ { X86::VPACKSSWBZ128rrk, X86::VPACKSSWBZ128rmk, 0 },
+ { X86::VPACKUSDWZ128rrk, X86::VPACKUSDWZ128rmk, 0 },
+ { X86::VPACKUSWBZ128rrk, X86::VPACKUSWBZ128rmk, 0 },
{ X86::VPADDBZ128rrk, X86::VPADDBZ128rmk, 0 },
{ X86::VPADDDZ128rrk, X86::VPADDDZ128rmk, 0 },
{ X86::VPADDQZ128rrk, X86::VPADDQZ128rmk, 0 },
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll?rev=295154&r1=295153&r2=295154&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll Wed Feb 15 00:51:39 2017
@@ -278,6 +278,59 @@ define <32 x i16> @stack_fold_pabsw_mask
ret <32 x i16> %2
}
+define <32 x i16> @stack_fold_packssdw(<16 x i32> %a0, <16 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_packssdw
+ ;CHECK: vpackssdw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a0, <16 x i32> %a1, <32 x i16> undef, i32 -1)
+ ret <32 x i16> %2
+}
+declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) nounwind readnone
+
+define <64 x i8> @stack_fold_packsswb(<32 x i16> %a0, <32 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_packsswb
+ ;CHECK: vpacksswb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a0, <32 x i16> %a1, <64 x i8> undef, i64 -1)
+ ret <64 x i8> %2
+}
+declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) nounwind readnone
+
+define <32 x i16> @stack_fold_packusdw(<16 x i32> %a0, <16 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_packusdw
+ ;CHECK: vpackusdw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a0, <16 x i32> %a1, <32 x i16> undef, i32 -1)
+ ret <32 x i16> %2
+}
+declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) nounwind readnone
+
+define <32 x i16> @stack_fold_packusdw_mask(<32 x i16>* %passthru, <16 x i32> %a0, <16 x i32> %a1, i32 %mask) {
+ ;CHECK-LABEL: stack_fold_packusdw_mask
+ ;CHECK: vpackusdw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = load <32 x i16>, <32 x i16>* %passthru
+ %3 = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a0, <16 x i32> %a1, <32 x i16> %2, i32 %mask)
+ ret <32 x i16> %3
+}
+
+define <32 x i16> @stack_fold_packusdw_maskz(<16 x i32> %a0, <16 x i32> %a1, i32 %mask) {
+ ;CHECK-LABEL: stack_fold_packusdw_maskz
+ ;CHECK: vpackusdw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a0, <16 x i32> %a1, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %2
+}
+
+define <64 x i8> @stack_fold_packuswb(<32 x i16> %a0, <32 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_packuswb
+ ;CHECK: vpackuswb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a0, <32 x i16> %a1, <64 x i8> undef, i64 -1)
+ ret <64 x i8> %2
+}
+declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) nounwind readnone
+
define <64 x i8> @stack_fold_paddb(<64 x i8> %a0, <64 x i8> %a1) {
;CHECK-LABEL: stack_fold_paddb
;CHECK: vpaddb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll?rev=295154&r1=295153&r2=295154&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll Wed Feb 15 00:51:39 2017
@@ -193,6 +193,78 @@ define <16 x i16> @stack_fold_pabsw_ymm(
}
declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
+define <8 x i16> @stack_fold_packssdw(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_packssdw
+ ;CHECK: vpackssdw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <16 x i16> @stack_fold_packssdw_ymm(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_packssdw_ymm
+ ;CHECK: vpackssdw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <16 x i8> @stack_fold_packsswb(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_packsswb
+ ;CHECK: vpacksswb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <32 x i8> @stack_fold_packsswb_ymm(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_packsswb_ymm
+ ;CHECK: vpacksswb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1)
+ ret <32 x i8> %2
+}
+declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_packusdw
+ ;CHECK: vpackusdw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <16 x i16> @stack_fold_packusdw_ymm(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_packusdw_ymm
+ ;CHECK: vpackusdw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <16 x i8> @stack_fold_packuswb(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_packuswb
+ ;CHECK: vpackuswb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <32 x i8> @stack_fold_packuswb_ymm(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_packuswb_ymm
+ ;CHECK: vpackuswb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1)
+ ret <32 x i8> %2
+}
+declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
+
define <16 x i8> @stack_fold_paddb(<16 x i8> %a0, <16 x i8> %a1) {
;CHECK-LABEL: stack_fold_paddb
;CHECK: vpaddb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
More information about the llvm-commits
mailing list