[llvm] r268189 - [AVX512] Prefer AVX512 VPACK instructions over AVX/AVX2 instructions when VLX and BWI are supported.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Apr 30 23:52:19 PDT 2016


Author: ctopper
Date: Sun May  1 01:52:19 2016
New Revision: 268189

URL: http://llvm.org/viewvc/llvm-project?rev=268189&view=rev
Log:
[AVX512] Prefer AVX512 VPACK instructions over AVX/AVX2 instructions when VLX and BWI are supported.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=268189&r1=268188&r2=268189&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sun May  1 01:52:19 2016
@@ -4451,7 +4451,7 @@ multiclass sse4_pack_y<bits<8> opc, stri
                   Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
   defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss,
                              bc_v8i16, loadv2i64, 0>, VEX_4V;
   defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss,
@@ -4463,7 +4463,7 @@ let Predicates = [HasAVX] in {
                              bc_v4i32, loadv2i64, 0>, VEX_4V;
 }
 
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
   defm VPACKSSWB : sse2_pack_y<0x63, "vpacksswb", v32i8, v16i16, X86Packss,
                                bc_v16i16>, VEX_4V, VEX_L;
   defm VPACKSSDW : sse2_pack_y<0x6B, "vpackssdw", v16i16, v8i32, X86Packss,
@@ -4484,7 +4484,7 @@ let Constraints = "$src1 = $dst" in {
   defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus,
                             bc_v8i16, memopv2i64>;
 
-  let Predicates = [HasSSE41] in
+  let Predicates = [UseSSE41] in
   defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus,
                             bc_v4i32, memopv2i64>;
 }

Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll?rev=268189&r1=268188&r2=268189&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll Sun May  1 01:52:19 2016
@@ -2055,7 +2055,7 @@ declare <16 x i16> @llvm.x86.avx512.mask
 
 define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
   ;CHECK-LABEL: test_mask_packs_epi32_rr_128
-  ;CHECK: vpackssdw       %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0xc1]
+  ;CHECK: vpackssdw       %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6b,0xc1]
   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
   ret <8 x i16> %res
 }
@@ -2076,7 +2076,7 @@ define <8 x i16> @test_mask_packs_epi32_
 
 define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
   ;CHECK-LABEL: test_mask_packs_epi32_rm_128
-  ;CHECK: vpackssdw       (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0x07]
+  ;CHECK: vpackssdw       (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6b,0x07]
   %b = load <4 x i32>, <4 x i32>* %ptr_b
   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
   ret <8 x i16> %res
@@ -2132,7 +2132,7 @@ declare <8 x i16> @llvm.x86.avx512.mask.
 
 define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
   ;CHECK-LABEL: test_mask_packs_epi32_rr_256
-  ;CHECK: vpackssdw       %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0xc1]
+  ;CHECK: vpackssdw       %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6b,0xc1]
   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
   ret <16 x i16> %res
 }
@@ -2153,7 +2153,7 @@ define <16 x i16> @test_mask_packs_epi32
 
 define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
   ;CHECK-LABEL: test_mask_packs_epi32_rm_256
-  ;CHECK: vpackssdw       (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0x07]
+  ;CHECK: vpackssdw       (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6b,0x07]
   %b = load <8 x i32>, <8 x i32>* %ptr_b
   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
   ret <16 x i16> %res
@@ -2209,7 +2209,7 @@ declare <16 x i16> @llvm.x86.avx512.mask
 
 define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
   ;CHECK-LABEL: test_mask_packs_epi16_rr_128
-  ;CHECK: vpacksswb       %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc1]
+  ;CHECK: vpacksswb       %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x63,0xc1]
   %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
   ret <16 x i8> %res
 }
@@ -2230,7 +2230,7 @@ define <16 x i8> @test_mask_packs_epi16_
 
 define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
   ;CHECK-LABEL: test_mask_packs_epi16_rm_128
-  ;CHECK: vpacksswb       (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0x07]
+  ;CHECK: vpacksswb       (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x63,0x07]
   %b = load <8 x i16>, <8 x i16>* %ptr_b
   %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
   ret <16 x i8> %res
@@ -2256,7 +2256,7 @@ declare <16 x i8> @llvm.x86.avx512.mask.
 
 define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
   ;CHECK-LABEL: test_mask_packs_epi16_rr_256
-  ;CHECK: vpacksswb       %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0xc1]
+  ;CHECK: vpacksswb       %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x63,0xc1]
   %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
   ret <32 x i8> %res
 }
@@ -2277,7 +2277,7 @@ define <32 x i8> @test_mask_packs_epi16_
 
 define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
   ;CHECK-LABEL: test_mask_packs_epi16_rm_256
-  ;CHECK: vpacksswb       (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0x07]
+  ;CHECK: vpacksswb       (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x63,0x07]
   %b = load <16 x i16>, <16 x i16>* %ptr_b
   %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
   ret <32 x i8> %res




More information about the llvm-commits mailing list