[llvm] 1ffc507 - [X86] Add avx512f only command lines to the vector add/sub saturation tests. NFC

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 14 16:50:55 PDT 2020


Author: Craig Topper
Date: 2020-03-14T16:50:44-07:00
New Revision: 1ffc5074050831b19df8834e3abfcb151b304247

URL: https://github.com/llvm/llvm-project/commit/1ffc5074050831b19df8834e3abfcb151b304247
DIFF: https://github.com/llvm/llvm-project/commit/1ffc5074050831b19df8834e3abfcb151b304247.diff

LOG: [X86] Add avx512f only command lines to the vector add/sub saturation tests. NFC

Gives us coverage of splitting the v32i16/v64i8 when we have
avx512f and not avx512bw.

Considering making v32i16/v64i8 a legal type on avx512f which
needs this test coverage.

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/sadd_sat_vec.ll
    llvm/test/CodeGen/X86/ssub_sat_vec.ll
    llvm/test/CodeGen/X86/uadd_sat_vec.ll
    llvm/test/CodeGen/X86/usub_sat_vec.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
index 25e9a09c43d2..2961129dfb17 100644
--- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
@@ -4,7 +4,8 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
 
 declare <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8>, <1 x i8>)
 declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>)
@@ -111,10 +112,19 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
 ; AVX2-NEXT:    vpaddsb %ymm3, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v64i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpaddsb %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v64i8:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpaddsb %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v64i8:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpaddsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
   %z = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
   ret <64 x i8> %z
 }
@@ -191,10 +201,19 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
 ; AVX2-NEXT:    vpaddsw %ymm3, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v32i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v32i16:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpaddsw %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v32i16:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
   %z = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
   ret <32 x i16> %z
 }
@@ -551,15 +570,28 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
 ; AVX2-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v16i1:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsllw $7, %xmm1, %xmm1
-; AVX512-NEXT:    vpmovb2m %xmm1, %k0
-; AVX512-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX512-NEXT:    vpmovb2m %xmm0, %k1
-; AVX512-NEXT:    korw %k0, %k1, %k0
-; AVX512-NEXT:    vpmovm2b %k0, %xmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v16i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
+; AVX512F-NEXT:    vpslld $31, %zmm1, %zmm1
+; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512F-NEXT:    vpord %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v16i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpsllw $7, %xmm1, %xmm1
+; AVX512BW-NEXT:    vpmovb2m %xmm1, %k0
+; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpmovb2m %xmm0, %k1
+; AVX512BW-NEXT:    korw %k0, %k1, %k0
+; AVX512BW-NEXT:    vpmovm2b %k0, %xmm0
+; AVX512BW-NEXT:    retq
   %z = call <16 x i1> @llvm.sadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
   ret <16 x i1> %z
 }
@@ -639,19 +671,30 @@ define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
 ; AVX2-NEXT:    vblendvps %xmm0, %xmm3, %xmm2, %xmm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v2i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm2, %k0
-; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
-; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %k1
-; AVX512-NEXT:    kxorw %k1, %k0, %k1
-; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm2, %k2
-; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
-; AVX512-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
-; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm1 {%k1}
-; AVX512-NEXT:    vmovdqa %xmm1, %xmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v2i32:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpaddd %xmm1, %xmm0, %xmm2
+; AVX512F-NEXT:    vbroadcastss {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647]
+; AVX512F-NEXT:    vbroadcastss {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; AVX512F-NEXT:    vblendvps %xmm2, %xmm3, %xmm4, %xmm3
+; AVX512F-NEXT:    vpcmpgtd %xmm2, %xmm0, %xmm0
+; AVX512F-NEXT:    vpxor %xmm0, %xmm1, %xmm0
+; AVX512F-NEXT:    vblendvps %xmm0, %xmm3, %xmm2, %xmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v2i32:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512BW-NEXT:    vpcmpgtd %xmm1, %xmm2, %k0
+; AVX512BW-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
+; AVX512BW-NEXT:    vpcmpgtd %xmm1, %xmm0, %k1
+; AVX512BW-NEXT:    kxorw %k1, %k0, %k1
+; AVX512BW-NEXT:    vpcmpgtd %xmm1, %xmm2, %k2
+; AVX512BW-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; AVX512BW-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
+; AVX512BW-NEXT:    vmovdqa32 %xmm0, %xmm1 {%k1}
+; AVX512BW-NEXT:    vmovdqa %xmm1, %xmm0
+; AVX512BW-NEXT:    retq
   %z = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %x, <2 x i32> %y)
   ret <2 x i32> %z
 }
@@ -729,19 +772,30 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; AVX2-NEXT:    vblendvps %xmm0, %xmm3, %xmm2, %xmm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v4i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm2, %k0
-; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
-; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %k1
-; AVX512-NEXT:    kxorw %k1, %k0, %k1
-; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm2, %k2
-; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
-; AVX512-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
-; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm1 {%k1}
-; AVX512-NEXT:    vmovdqa %xmm1, %xmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v4i32:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpaddd %xmm1, %xmm0, %xmm2
+; AVX512F-NEXT:    vbroadcastss {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647]
+; AVX512F-NEXT:    vbroadcastss {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; AVX512F-NEXT:    vblendvps %xmm2, %xmm3, %xmm4, %xmm3
+; AVX512F-NEXT:    vpcmpgtd %xmm2, %xmm0, %xmm0
+; AVX512F-NEXT:    vpxor %xmm0, %xmm1, %xmm0
+; AVX512F-NEXT:    vblendvps %xmm0, %xmm3, %xmm2, %xmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v4i32:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512BW-NEXT:    vpcmpgtd %xmm1, %xmm2, %k0
+; AVX512BW-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
+; AVX512BW-NEXT:    vpcmpgtd %xmm1, %xmm0, %k1
+; AVX512BW-NEXT:    kxorw %k1, %k0, %k1
+; AVX512BW-NEXT:    vpcmpgtd %xmm1, %xmm2, %k2
+; AVX512BW-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; AVX512BW-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
+; AVX512BW-NEXT:    vmovdqa32 %xmm0, %xmm1 {%k1}
+; AVX512BW-NEXT:    vmovdqa %xmm1, %xmm0
+; AVX512BW-NEXT:    retq
   %z = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
   ret <4 x i32> %z
 }
@@ -866,19 +920,30 @@ define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
 ; AVX2-NEXT:    vblendvps %ymm0, %ymm3, %ymm2, %ymm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v8i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm2, %k0
-; AVX512-NEXT:    vpaddd %ymm1, %ymm0, %ymm1
-; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %k1
-; AVX512-NEXT:    kxorw %k1, %k0, %k1
-; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm2, %k2
-; AVX512-NEXT:    vpbroadcastd {{.*#+}} ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
-; AVX512-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm0 {%k2}
-; AVX512-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k1}
-; AVX512-NEXT:    vmovdqa %ymm1, %ymm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v8i32:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpaddd %ymm1, %ymm0, %ymm2
+; AVX512F-NEXT:    vbroadcastss {{.*#+}} ymm3 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
+; AVX512F-NEXT:    vbroadcastss {{.*#+}} ymm4 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
+; AVX512F-NEXT:    vblendvps %ymm2, %ymm3, %ymm4, %ymm3
+; AVX512F-NEXT:    vpcmpgtd %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT:    vpxor %ymm0, %ymm1, %ymm0
+; AVX512F-NEXT:    vblendvps %ymm0, %ymm3, %ymm2, %ymm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v8i32:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512BW-NEXT:    vpcmpgtd %ymm1, %ymm2, %k0
+; AVX512BW-NEXT:    vpaddd %ymm1, %ymm0, %ymm1
+; AVX512BW-NEXT:    vpcmpgtd %ymm1, %ymm0, %k1
+; AVX512BW-NEXT:    kxorw %k1, %k0, %k1
+; AVX512BW-NEXT:    vpcmpgtd %ymm1, %ymm2, %k2
+; AVX512BW-NEXT:    vpbroadcastd {{.*#+}} ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
+; AVX512BW-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm0 {%k2}
+; AVX512BW-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k1}
+; AVX512BW-NEXT:    vmovdqa %ymm1, %ymm0
+; AVX512BW-NEXT:    retq
   %z = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
   ret <8 x i32> %z
 }
@@ -1221,19 +1286,29 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
 ; AVX2-NEXT:    vblendvpd %xmm0, %xmm3, %xmm2, %xmm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v2i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX512-NEXT:    vpcmpgtq %xmm1, %xmm2, %k0
-; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm1
-; AVX512-NEXT:    vpcmpgtq %xmm1, %xmm0, %k1
-; AVX512-NEXT:    kxorw %k1, %k0, %k1
-; AVX512-NEXT:    vpcmpgtq %xmm1, %xmm2, %k2
-; AVX512-NEXT:    vmovdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
-; AVX512-NEXT:    vmovdqa64 {{.*}}(%rip), %xmm0 {%k2}
-; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm1 {%k1}
-; AVX512-NEXT:    vmovdqa %xmm1, %xmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v2i64:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpaddq %xmm1, %xmm0, %xmm2
+; AVX512F-NEXT:    vmovapd {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX512F-NEXT:    vblendvpd %xmm2, {{.*}}(%rip), %xmm3, %xmm3
+; AVX512F-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
+; AVX512F-NEXT:    vpxor %xmm0, %xmm1, %xmm0
+; AVX512F-NEXT:    vblendvpd %xmm0, %xmm3, %xmm2, %xmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v2i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512BW-NEXT:    vpcmpgtq %xmm1, %xmm2, %k0
+; AVX512BW-NEXT:    vpaddq %xmm1, %xmm0, %xmm1
+; AVX512BW-NEXT:    vpcmpgtq %xmm1, %xmm0, %k1
+; AVX512BW-NEXT:    kxorw %k1, %k0, %k1
+; AVX512BW-NEXT:    vpcmpgtq %xmm1, %xmm2, %k2
+; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; AVX512BW-NEXT:    vmovdqa64 {{.*}}(%rip), %xmm0 {%k2}
+; AVX512BW-NEXT:    vmovdqa64 %xmm0, %xmm1 {%k1}
+; AVX512BW-NEXT:    vmovdqa %xmm1, %xmm0
+; AVX512BW-NEXT:    retq
   %z = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
   ret <2 x i64> %z
 }
@@ -1426,19 +1501,30 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
 ; AVX2-NEXT:    vblendvpd %ymm0, %ymm3, %ymm2, %ymm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v4i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm2, %k0
-; AVX512-NEXT:    vpaddq %ymm1, %ymm0, %ymm1
-; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %k1
-; AVX512-NEXT:    kxorw %k1, %k0, %k1
-; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm2, %k2
-; AVX512-NEXT:    vpbroadcastq {{.*#+}} ymm0 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX512-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm0 {%k2}
-; AVX512-NEXT:    vmovdqa64 %ymm0, %ymm1 {%k1}
-; AVX512-NEXT:    vmovdqa %ymm1, %ymm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v4i64:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpaddq %ymm1, %ymm0, %ymm2
+; AVX512F-NEXT:    vbroadcastsd {{.*#+}} ymm3 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
+; AVX512F-NEXT:    vbroadcastsd {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX512F-NEXT:    vblendvpd %ymm2, %ymm3, %ymm4, %ymm3
+; AVX512F-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT:    vpxor %ymm0, %ymm1, %ymm0
+; AVX512F-NEXT:    vblendvpd %ymm0, %ymm3, %ymm2, %ymm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v4i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512BW-NEXT:    vpcmpgtq %ymm1, %ymm2, %k0
+; AVX512BW-NEXT:    vpaddq %ymm1, %ymm0, %ymm1
+; AVX512BW-NEXT:    vpcmpgtq %ymm1, %ymm0, %k1
+; AVX512BW-NEXT:    kxorw %k1, %k0, %k1
+; AVX512BW-NEXT:    vpcmpgtq %ymm1, %ymm2, %k2
+; AVX512BW-NEXT:    vpbroadcastq {{.*#+}} ymm0 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX512BW-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm0 {%k2}
+; AVX512BW-NEXT:    vmovdqa64 %ymm0, %ymm1 {%k1}
+; AVX512BW-NEXT:    vmovdqa %ymm1, %ymm0
+; AVX512BW-NEXT:    retq
   %z = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
   ret <4 x i64> %z
 }

diff  --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
index c3612a7a538a..784fa0e21ec5 100644
--- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
@@ -4,7 +4,8 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
 
 declare <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8>, <1 x i8>)
 declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>)
@@ -111,10 +112,19 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
 ; AVX2-NEXT:    vpsubsb %ymm3, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v64i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsubsb %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v64i8:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpsubsb %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v64i8:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpsubsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
   %z = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
   ret <64 x i8> %z
 }
@@ -191,10 +201,19 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
 ; AVX2-NEXT:    vpsubsw %ymm3, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v32i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v32i16:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpsubsw %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v32i16:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
   %z = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
   ret <32 x i16> %z
 }
@@ -547,15 +566,28 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
 ; AVX2-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v16i1:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX512-NEXT:    vpmovb2m %xmm0, %k0
-; AVX512-NEXT:    vpsllw $7, %xmm1, %xmm0
-; AVX512-NEXT:    vpmovb2m %xmm0, %k1
-; AVX512-NEXT:    kandnw %k0, %k1, %k0
-; AVX512-NEXT:    vpmovm2b %k0, %xmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v16i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
+; AVX512F-NEXT:    vpslld $31, %zmm1, %zmm1
+; AVX512F-NEXT:    vptestnmd %zmm1, %zmm1, %k1
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1 {%k1}
+; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v16i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpmovb2m %xmm0, %k0
+; AVX512BW-NEXT:    vpsllw $7, %xmm1, %xmm0
+; AVX512BW-NEXT:    vpmovb2m %xmm0, %k1
+; AVX512BW-NEXT:    kandnw %k0, %k1, %k0
+; AVX512BW-NEXT:    vpmovm2b %k0, %xmm0
+; AVX512BW-NEXT:    retq
   %z = call <16 x i1> @llvm.ssub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
   ret <16 x i1> %z
 }
@@ -641,19 +673,32 @@ define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
 ; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v2i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX512-NEXT:    vpcmpgtd %xmm2, %xmm1, %k0
-; AVX512-NEXT:    vpsubd %xmm1, %xmm0, %xmm1
-; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %k1
-; AVX512-NEXT:    kxorw %k1, %k0, %k1
-; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm2, %k2
-; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
-; AVX512-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
-; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm1 {%k1}
-; AVX512-NEXT:    vmovdqa %xmm1, %xmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v2i32:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512F-NEXT:    vpcmpgtd %xmm2, %xmm1, %xmm2
+; AVX512F-NEXT:    vpsubd %xmm1, %xmm0, %xmm1
+; AVX512F-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpxor %xmm0, %xmm2, %xmm0
+; AVX512F-NEXT:    vbroadcastss {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
+; AVX512F-NEXT:    vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
+; AVX512F-NEXT:    vblendvps %xmm1, %xmm2, %xmm3, %xmm2
+; AVX512F-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v2i32:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512BW-NEXT:    vpcmpgtd %xmm2, %xmm1, %k0
+; AVX512BW-NEXT:    vpsubd %xmm1, %xmm0, %xmm1
+; AVX512BW-NEXT:    vpcmpgtd %xmm1, %xmm0, %k1
+; AVX512BW-NEXT:    kxorw %k1, %k0, %k1
+; AVX512BW-NEXT:    vpcmpgtd %xmm1, %xmm2, %k2
+; AVX512BW-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; AVX512BW-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
+; AVX512BW-NEXT:    vmovdqa32 %xmm0, %xmm1 {%k1}
+; AVX512BW-NEXT:    vmovdqa %xmm1, %xmm0
+; AVX512BW-NEXT:    retq
   %z = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %x, <2 x i32> %y)
   ret <2 x i32> %z
 }
@@ -737,19 +782,32 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v4i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX512-NEXT:    vpcmpgtd %xmm2, %xmm1, %k0
-; AVX512-NEXT:    vpsubd %xmm1, %xmm0, %xmm1
-; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %k1
-; AVX512-NEXT:    kxorw %k1, %k0, %k1
-; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm2, %k2
-; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
-; AVX512-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
-; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm1 {%k1}
-; AVX512-NEXT:    vmovdqa %xmm1, %xmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v4i32:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512F-NEXT:    vpcmpgtd %xmm2, %xmm1, %xmm2
+; AVX512F-NEXT:    vpsubd %xmm1, %xmm0, %xmm1
+; AVX512F-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpxor %xmm0, %xmm2, %xmm0
+; AVX512F-NEXT:    vbroadcastss {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
+; AVX512F-NEXT:    vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
+; AVX512F-NEXT:    vblendvps %xmm1, %xmm2, %xmm3, %xmm2
+; AVX512F-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v4i32:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512BW-NEXT:    vpcmpgtd %xmm2, %xmm1, %k0
+; AVX512BW-NEXT:    vpsubd %xmm1, %xmm0, %xmm1
+; AVX512BW-NEXT:    vpcmpgtd %xmm1, %xmm0, %k1
+; AVX512BW-NEXT:    kxorw %k1, %k0, %k1
+; AVX512BW-NEXT:    vpcmpgtd %xmm1, %xmm2, %k2
+; AVX512BW-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; AVX512BW-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
+; AVX512BW-NEXT:    vmovdqa32 %xmm0, %xmm1 {%k1}
+; AVX512BW-NEXT:    vmovdqa %xmm1, %xmm0
+; AVX512BW-NEXT:    retq
   %z = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
   ret <4 x i32> %z
 }
@@ -883,19 +941,32 @@ define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
 ; AVX2-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v8i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX512-NEXT:    vpcmpgtd %ymm2, %ymm1, %k0
-; AVX512-NEXT:    vpsubd %ymm1, %ymm0, %ymm1
-; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %k1
-; AVX512-NEXT:    kxorw %k1, %k0, %k1
-; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm2, %k2
-; AVX512-NEXT:    vpbroadcastd {{.*#+}} ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
-; AVX512-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm0 {%k2}
-; AVX512-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k1}
-; AVX512-NEXT:    vmovdqa %ymm1, %ymm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v8i32:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512F-NEXT:    vpcmpgtd %ymm2, %ymm1, %ymm2
+; AVX512F-NEXT:    vpsubd %ymm1, %ymm0, %ymm1
+; AVX512F-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vpxor %ymm0, %ymm2, %ymm0
+; AVX512F-NEXT:    vbroadcastss {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
+; AVX512F-NEXT:    vbroadcastss {{.*#+}} ymm3 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
+; AVX512F-NEXT:    vblendvps %ymm1, %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v8i32:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512BW-NEXT:    vpcmpgtd %ymm2, %ymm1, %k0
+; AVX512BW-NEXT:    vpsubd %ymm1, %ymm0, %ymm1
+; AVX512BW-NEXT:    vpcmpgtd %ymm1, %ymm0, %k1
+; AVX512BW-NEXT:    kxorw %k1, %k0, %k1
+; AVX512BW-NEXT:    vpcmpgtd %ymm1, %ymm2, %k2
+; AVX512BW-NEXT:    vpbroadcastd {{.*#+}} ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
+; AVX512BW-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm0 {%k2}
+; AVX512BW-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k1}
+; AVX512BW-NEXT:    vmovdqa %ymm1, %ymm0
+; AVX512BW-NEXT:    retq
   %z = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
   ret <8 x i32> %z
 }
@@ -1280,19 +1351,31 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
 ; AVX2-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v2i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX512-NEXT:    vpcmpgtq %xmm2, %xmm1, %k0
-; AVX512-NEXT:    vpsubq %xmm1, %xmm0, %xmm1
-; AVX512-NEXT:    vpcmpgtq %xmm1, %xmm0, %k1
-; AVX512-NEXT:    kxorw %k1, %k0, %k1
-; AVX512-NEXT:    vpcmpgtq %xmm1, %xmm2, %k2
-; AVX512-NEXT:    vmovdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
-; AVX512-NEXT:    vmovdqa64 {{.*}}(%rip), %xmm0 {%k2}
-; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm1 {%k1}
-; AVX512-NEXT:    vmovdqa %xmm1, %xmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v2i64:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512F-NEXT:    vpcmpgtq %xmm2, %xmm1, %xmm2
+; AVX512F-NEXT:    vpsubq %xmm1, %xmm0, %xmm1
+; AVX512F-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpxor %xmm0, %xmm2, %xmm0
+; AVX512F-NEXT:    vmovapd {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX512F-NEXT:    vblendvpd %xmm1, {{.*}}(%rip), %xmm2, %xmm2
+; AVX512F-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v2i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512BW-NEXT:    vpcmpgtq %xmm2, %xmm1, %k0
+; AVX512BW-NEXT:    vpsubq %xmm1, %xmm0, %xmm1
+; AVX512BW-NEXT:    vpcmpgtq %xmm1, %xmm0, %k1
+; AVX512BW-NEXT:    kxorw %k1, %k0, %k1
+; AVX512BW-NEXT:    vpcmpgtq %xmm1, %xmm2, %k2
+; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; AVX512BW-NEXT:    vmovdqa64 {{.*}}(%rip), %xmm0 {%k2}
+; AVX512BW-NEXT:    vmovdqa64 %xmm0, %xmm1 {%k1}
+; AVX512BW-NEXT:    vmovdqa %xmm1, %xmm0
+; AVX512BW-NEXT:    retq
   %z = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
   ret <2 x i64> %z
 }
@@ -1532,19 +1615,32 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
 ; AVX2-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v4i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX512-NEXT:    vpcmpgtq %ymm2, %ymm1, %k0
-; AVX512-NEXT:    vpsubq %ymm1, %ymm0, %ymm1
-; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %k1
-; AVX512-NEXT:    kxorw %k1, %k0, %k1
-; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm2, %k2
-; AVX512-NEXT:    vpbroadcastq {{.*#+}} ymm0 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX512-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm0 {%k2}
-; AVX512-NEXT:    vmovdqa64 %ymm0, %ymm1 {%k1}
-; AVX512-NEXT:    vmovdqa %ymm1, %ymm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v4i64:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512F-NEXT:    vpcmpgtq %ymm2, %ymm1, %ymm2
+; AVX512F-NEXT:    vpsubq %ymm1, %ymm0, %ymm1
+; AVX512F-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vpxor %ymm0, %ymm2, %ymm0
+; AVX512F-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
+; AVX512F-NEXT:    vbroadcastsd {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX512F-NEXT:    vblendvpd %ymm1, %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v4i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512BW-NEXT:    vpcmpgtq %ymm2, %ymm1, %k0
+; AVX512BW-NEXT:    vpsubq %ymm1, %ymm0, %ymm1
+; AVX512BW-NEXT:    vpcmpgtq %ymm1, %ymm0, %k1
+; AVX512BW-NEXT:    kxorw %k1, %k0, %k1
+; AVX512BW-NEXT:    vpcmpgtq %ymm1, %ymm2, %k2
+; AVX512BW-NEXT:    vpbroadcastq {{.*#+}} ymm0 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX512BW-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm0 {%k2}
+; AVX512BW-NEXT:    vmovdqa64 %ymm0, %ymm1 {%k1}
+; AVX512BW-NEXT:    vmovdqa %ymm1, %ymm0
+; AVX512BW-NEXT:    retq
   %z = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
   ret <4 x i64> %z
 }

diff  --git a/llvm/test/CodeGen/X86/uadd_sat_vec.ll b/llvm/test/CodeGen/X86/uadd_sat_vec.ll
index b398c44b4a02..3a4e59742891 100644
--- a/llvm/test/CodeGen/X86/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/uadd_sat_vec.ll
@@ -4,7 +4,8 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
 
 declare <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8>, <1 x i8>)
 declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>)
@@ -111,10 +112,19 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
 ; AVX2-NEXT:    vpaddusb %ymm3, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v64i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpaddusb %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v64i8:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpaddusb %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v64i8:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpaddusb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
   %z = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
   ret <64 x i8> %z
 }
@@ -191,10 +201,19 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
 ; AVX2-NEXT:    vpaddusw %ymm3, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v32i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v32i16:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpaddusw %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v32i16:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
   %z = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
   ret <32 x i16> %z
 }
@@ -524,15 +543,28 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
 ; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v16i1:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsllw $7, %xmm1, %xmm1
-; AVX512-NEXT:    vpmovb2m %xmm1, %k0
-; AVX512-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX512-NEXT:    vpmovb2m %xmm0, %k1
-; AVX512-NEXT:    korw %k0, %k1, %k0
-; AVX512-NEXT:    vpmovm2b %k0, %xmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v16i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
+; AVX512F-NEXT:    vpslld $31, %zmm1, %zmm1
+; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512F-NEXT:    vpord %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v16i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpsllw $7, %xmm1, %xmm1
+; AVX512BW-NEXT:    vpmovb2m %xmm1, %k0
+; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpmovb2m %xmm0, %k1
+; AVX512BW-NEXT:    korw %k0, %k1, %k0
+; AVX512BW-NEXT:    vpmovm2b %k0, %xmm0
+; AVX512BW-NEXT:    retq
   %z = call <16 x i1> @llvm.uadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
   ret <16 x i1> %z
 }
@@ -584,13 +616,23 @@ define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
 ; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v2i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vmovdqa %xmm1, %xmm2
-; AVX512-NEXT:    vpternlogq $15, %xmm1, %xmm1, %xmm2
-; AVX512-NEXT:    vpminud %xmm2, %xmm0, %xmm0
-; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v2i32:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512F-NEXT:    vmovdqa64 %zmm1, %zmm2
+; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm2
+; AVX512F-NEXT:    vpminud %xmm2, %xmm0, %xmm0
+; AVX512F-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v2i32:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vmovdqa %xmm1, %xmm2
+; AVX512BW-NEXT:    vpternlogq $15, %xmm1, %xmm1, %xmm2
+; AVX512BW-NEXT:    vpminud %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
   %z = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %x, <2 x i32> %y)
   ret <2 x i32> %z
 }
@@ -640,13 +682,23 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v4i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vmovdqa %xmm1, %xmm2
-; AVX512-NEXT:    vpternlogq $15, %xmm1, %xmm1, %xmm2
-; AVX512-NEXT:    vpminud %xmm2, %xmm0, %xmm0
-; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v4i32:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512F-NEXT:    vmovdqa64 %zmm1, %zmm2
+; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm2
+; AVX512F-NEXT:    vpminud %xmm2, %xmm0, %xmm0
+; AVX512F-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v4i32:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vmovdqa %xmm1, %xmm2
+; AVX512BW-NEXT:    vpternlogq $15, %xmm1, %xmm1, %xmm2
+; AVX512BW-NEXT:    vpminud %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
   %z = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
   ret <4 x i32> %z
 }
@@ -719,13 +771,22 @@ define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
 ; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v8i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vmovdqa %ymm1, %ymm2
-; AVX512-NEXT:    vpternlogq $15, %ymm1, %ymm1, %ymm2
-; AVX512-NEXT:    vpminud %ymm2, %ymm0, %ymm0
-; AVX512-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v8i32:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT:    vmovdqa64 %zmm1, %zmm2
+; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm2
+; AVX512F-NEXT:    vpminud %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v8i32:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vmovdqa %ymm1, %ymm2
+; AVX512BW-NEXT:    vpternlogq $15, %ymm1, %ymm1, %ymm2
+; AVX512BW-NEXT:    vpminud %ymm2, %ymm0, %ymm0
+; AVX512BW-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
   %z = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
   ret <8 x i32> %z
 }
@@ -926,13 +987,24 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
 ; AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v2i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vmovdqa %xmm1, %xmm2
-; AVX512-NEXT:    vpternlogq $15, %xmm1, %xmm1, %xmm2
-; AVX512-NEXT:    vpminuq %xmm2, %xmm0, %xmm0
-; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v2i64:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT:    vmovdqa64 %zmm1, %zmm2
+; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm2
+; AVX512F-NEXT:    vpminuq %zmm2, %zmm0, %zmm0
+; AVX512F-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v2i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vmovdqa %xmm1, %xmm2
+; AVX512BW-NEXT:    vpternlogq $15, %xmm1, %xmm1, %xmm2
+; AVX512BW-NEXT:    vpminuq %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
   %z = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
   ret <2 x i64> %z
 }
@@ -1063,13 +1135,23 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
 ; AVX2-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v4i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vmovdqa %ymm1, %ymm2
-; AVX512-NEXT:    vpternlogq $15, %ymm1, %ymm1, %ymm2
-; AVX512-NEXT:    vpminuq %ymm2, %ymm0, %ymm0
-; AVX512-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v4i64:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT:    vmovdqa64 %zmm1, %zmm2
+; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm2
+; AVX512F-NEXT:    vpminuq %zmm2, %zmm0, %zmm0
+; AVX512F-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v4i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vmovdqa %ymm1, %ymm2
+; AVX512BW-NEXT:    vpternlogq $15, %ymm1, %ymm1, %ymm2
+; AVX512BW-NEXT:    vpminuq %ymm2, %ymm0, %ymm0
+; AVX512BW-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
   %z = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
   ret <4 x i64> %z
 }

diff  --git a/llvm/test/CodeGen/X86/usub_sat_vec.ll b/llvm/test/CodeGen/X86/usub_sat_vec.ll
index d56f9150a84c..d455a034f0c6 100644
--- a/llvm/test/CodeGen/X86/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/usub_sat_vec.ll
@@ -4,7 +4,8 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
 
 declare <1 x i8> @llvm.usub.sat.v1i8(<1 x i8>, <1 x i8>)
 declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>)
@@ -111,10 +112,19 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
 ; AVX2-NEXT:    vpsubusb %ymm3, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v64i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsubusb %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v64i8:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpsubusb %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v64i8:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpsubusb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
   %z = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
   ret <64 x i8> %z
 }
@@ -191,10 +201,19 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
 ; AVX2-NEXT:    vpsubusw %ymm3, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v32i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v32i16:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpsubusw %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v32i16:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
   %z = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
   ret <32 x i16> %z
 }
@@ -524,15 +543,28 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
 ; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v16i1:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX512-NEXT:    vpmovb2m %xmm0, %k0
-; AVX512-NEXT:    vpsllw $7, %xmm1, %xmm0
-; AVX512-NEXT:    vpmovb2m %xmm0, %k1
-; AVX512-NEXT:    kandnw %k0, %k1, %k0
-; AVX512-NEXT:    vpmovm2b %k0, %xmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v16i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
+; AVX512F-NEXT:    vpslld $31, %zmm1, %zmm1
+; AVX512F-NEXT:    vptestnmd %zmm1, %zmm1, %k1
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1 {%k1}
+; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v16i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpmovb2m %xmm0, %k0
+; AVX512BW-NEXT:    vpsllw $7, %xmm1, %xmm0
+; AVX512BW-NEXT:    vpmovb2m %xmm0, %k1
+; AVX512BW-NEXT:    kandnw %k0, %k1, %k0
+; AVX512BW-NEXT:    vpmovm2b %k0, %xmm0
+; AVX512BW-NEXT:    retq
   %z = call <16 x i1> @llvm.usub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
   ret <16 x i1> %z
 }
@@ -866,11 +898,20 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
 ; AVX2-NEXT:    vpand %xmm0, %xmm2, %xmm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v2i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v2i64:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v2i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
   %z = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
   ret <2 x i64> %z
 }
@@ -998,11 +1039,19 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
 ; AVX2-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: v4i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: v4i64:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: v4i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
   %z = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
   ret <4 x i64> %z
 }


        


More information about the llvm-commits mailing list