[llvm] r268189 - [AVX512] Prefer AVX512 VPACK instructions over AVX/AVX2 instructions when VLX and BWI are supported.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 30 23:52:19 PDT 2016
Author: ctopper
Date: Sun May 1 01:52:19 2016
New Revision: 268189
URL: http://llvm.org/viewvc/llvm-project?rev=268189&view=rev
Log:
[AVX512] Prefer AVX512 VPACK instructions over AVX/AVX2 instructions when VLX and BWI are supported.
Modified:
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=268189&r1=268188&r2=268189&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sun May 1 01:52:19 2016
@@ -4451,7 +4451,7 @@ multiclass sse4_pack_y<bits<8> opc, stri
Sched<[WriteShuffleLd, ReadAfterLd]>;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss,
bc_v8i16, loadv2i64, 0>, VEX_4V;
defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss,
@@ -4463,7 +4463,7 @@ let Predicates = [HasAVX] in {
bc_v4i32, loadv2i64, 0>, VEX_4V;
}
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPACKSSWB : sse2_pack_y<0x63, "vpacksswb", v32i8, v16i16, X86Packss,
bc_v16i16>, VEX_4V, VEX_L;
defm VPACKSSDW : sse2_pack_y<0x6B, "vpackssdw", v16i16, v8i32, X86Packss,
@@ -4484,7 +4484,7 @@ let Constraints = "$src1 = $dst" in {
defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus,
bc_v8i16, memopv2i64>;
- let Predicates = [HasSSE41] in
+ let Predicates = [UseSSE41] in
defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus,
bc_v4i32, memopv2i64>;
}
Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll?rev=268189&r1=268188&r2=268189&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll Sun May 1 01:52:19 2016
@@ -2055,7 +2055,7 @@ declare <16 x i16> @llvm.x86.avx512.mask
define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
;CHECK-LABEL: test_mask_packs_epi32_rr_128
- ;CHECK: vpackssdw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0xc1]
+ ;CHECK: vpackssdw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6b,0xc1]
%res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
ret <8 x i16> %res
}
@@ -2076,7 +2076,7 @@ define <8 x i16> @test_mask_packs_epi32_
define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
;CHECK-LABEL: test_mask_packs_epi32_rm_128
- ;CHECK: vpackssdw (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0x07]
+ ;CHECK: vpackssdw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6b,0x07]
%b = load <4 x i32>, <4 x i32>* %ptr_b
%res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
ret <8 x i16> %res
@@ -2132,7 +2132,7 @@ declare <8 x i16> @llvm.x86.avx512.mask.
define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
;CHECK-LABEL: test_mask_packs_epi32_rr_256
- ;CHECK: vpackssdw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0xc1]
+ ;CHECK: vpackssdw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6b,0xc1]
%res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
ret <16 x i16> %res
}
@@ -2153,7 +2153,7 @@ define <16 x i16> @test_mask_packs_epi32
define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
;CHECK-LABEL: test_mask_packs_epi32_rm_256
- ;CHECK: vpackssdw (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0x07]
+ ;CHECK: vpackssdw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6b,0x07]
%b = load <8 x i32>, <8 x i32>* %ptr_b
%res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
ret <16 x i16> %res
@@ -2209,7 +2209,7 @@ declare <16 x i16> @llvm.x86.avx512.mask
define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
;CHECK-LABEL: test_mask_packs_epi16_rr_128
- ;CHECK: vpacksswb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc1]
+ ;CHECK: vpacksswb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x63,0xc1]
%res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
ret <16 x i8> %res
}
@@ -2230,7 +2230,7 @@ define <16 x i8> @test_mask_packs_epi16_
define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
;CHECK-LABEL: test_mask_packs_epi16_rm_128
- ;CHECK: vpacksswb (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0x07]
+ ;CHECK: vpacksswb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x63,0x07]
%b = load <8 x i16>, <8 x i16>* %ptr_b
%res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
ret <16 x i8> %res
@@ -2256,7 +2256,7 @@ declare <16 x i8> @llvm.x86.avx512.mask.
define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
;CHECK-LABEL: test_mask_packs_epi16_rr_256
- ;CHECK: vpacksswb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0xc1]
+ ;CHECK: vpacksswb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x63,0xc1]
%res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
ret <32 x i8> %res
}
@@ -2277,7 +2277,7 @@ define <32 x i8> @test_mask_packs_epi16_
define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
;CHECK-LABEL: test_mask_packs_epi16_rm_256
- ;CHECK: vpacksswb (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0x07]
+ ;CHECK: vpacksswb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x63,0x07]
%b = load <16 x i16>, <16 x i16>* %ptr_b
%res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
ret <32 x i8> %res
More information about the llvm-commits
mailing list