[llvm] d6c72bd - [X86][XOP] Add XOP target vselect-pcmp tests

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 6 06:01:12 PDT 2020


Author: Simon Pilgrim
Date: 2020-07-06T13:58:26+01:00
New Revision: d6c72bdca2f20e724a755186e5c578b70b96b192

URL: https://github.com/llvm/llvm-project/commit/d6c72bdca2f20e724a755186e5c578b70b96b192
DIFF: https://github.com/llvm/llvm-project/commit/d6c72bdca2f20e724a755186e5c578b70b96b192.diff

LOG: [X86][XOP] Add XOP target vselect-pcmp tests

Noticed in the D83181 that XOP can probably do a lot more than other targets due to its vector shifts and vpcmov instructions

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/vselect-pcmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/vselect-pcmp.ll b/llvm/test/CodeGen/X86/vselect-pcmp.ll
index bc6dc30a9658..c393955e2088 100644
--- a/llvm/test/CodeGen/X86/vselect-pcmp.ll
+++ b/llvm/test/CodeGen/X86/vselect-pcmp.ll
@@ -1,8 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx       | FileCheck %s --check-prefix=AVX --check-prefix=AVX12F --check-prefix=AVX12 --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2      | FileCheck %s --check-prefix=AVX --check-prefix=AVX12F --check-prefix=AVX12 --check-prefix=AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f   | FileCheck %s --check-prefix=AVX --check-prefix=AVX12F --check-prefix=AVX512 --check-prefix=AVX512F
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl  | FileCheck %s --check-prefix=AVX                       --check-prefix=AVX512 --check-prefix=AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx       | FileCheck %s --check-prefixes=CHECK,AVX,AVX12F,AVX12,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2      | FileCheck %s --check-prefixes=CHECK,AVX,AVX12F,AVX12,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f   | FileCheck %s --check-prefixes=CHECK,AVX,AVX12F,AVX512,AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl  | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=xop       | FileCheck %s --check-prefixes=CHECK,XOP
 
 ; The condition vector for BLENDV* only cares about the sign bit of each element.
 ; So in these tests, if we generate BLENDV*, we should be able to remove the redundant cmp op.
@@ -10,10 +11,10 @@
 ; Test 128-bit vectors for all legal element types.
 
 define <16 x i8> @signbit_sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) {
-; AVX-LABEL: signbit_sel_v16i8:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    retq
+; CHECK-LABEL: signbit_sel_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %tr = icmp slt <16 x i8> %mask, zeroinitializer
   %z = select <16 x i1> %tr, <16 x i8> %x, <16 x i8> %y
   ret <16 x i8> %z
@@ -28,6 +29,13 @@ define <8 x i16> @signbit_sel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask)
 ; AVX-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
 ; AVX-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: signbit_sel_v8i16:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOP-NEXT:    vpcomltw %xmm3, %xmm2, %xmm2
+; XOP-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
+; XOP-NEXT:    retq
   %tr = icmp slt <8 x i16> %mask, zeroinitializer
   %z = select <8 x i1> %tr, <8 x i16> %x, <8 x i16> %y
   ret <8 x i16> %z
@@ -57,6 +65,11 @@ define <4 x i32> @signbit_sel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask)
 ; AVX512VL-NEXT:    vpcmpgtd %xmm2, %xmm3, %k1
 ; AVX512VL-NEXT:    vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
 ; AVX512VL-NEXT:    retq
+;
+; XOP-LABEL: signbit_sel_v4i32:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; XOP-NEXT:    retq
   %tr = icmp slt <4 x i32> %mask, zeroinitializer
   %z = select <4 x i1> %tr, <4 x i32> %x, <4 x i32> %y
   ret <4 x i32> %z
@@ -86,6 +99,11 @@ define <2 x i64> @signbit_sel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask)
 ; AVX512VL-NEXT:    vpcmpgtq %xmm2, %xmm3, %k1
 ; AVX512VL-NEXT:    vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
 ; AVX512VL-NEXT:    retq
+;
+; XOP-LABEL: signbit_sel_v2i64:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; XOP-NEXT:    retq
   %tr = icmp slt <2 x i64> %mask, zeroinitializer
   %z = select <2 x i1> %tr, <2 x i64> %x, <2 x i64> %y
   ret <2 x i64> %z
@@ -115,6 +133,11 @@ define <4 x float> @signbit_sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i32>
 ; AVX512VL-NEXT:    vpcmpgtd %xmm2, %xmm3, %k1
 ; AVX512VL-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
 ; AVX512VL-NEXT:    retq
+;
+; XOP-LABEL: signbit_sel_v4f32:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; XOP-NEXT:    retq
   %tr = icmp slt <4 x i32> %mask, zeroinitializer
   %z = select <4 x i1> %tr, <4 x float> %x, <4 x float> %y
   ret <4 x float> %z
@@ -144,6 +167,11 @@ define <2 x double> @signbit_sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i6
 ; AVX512VL-NEXT:    vpcmpgtq %xmm2, %xmm3, %k1
 ; AVX512VL-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
 ; AVX512VL-NEXT:    retq
+;
+; XOP-LABEL: signbit_sel_v2f64:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; XOP-NEXT:    retq
   %tr = icmp slt <2 x i64> %mask, zeroinitializer
   %z = select <2 x i1> %tr, <2 x double> %x, <2 x double> %y
   ret <2 x double> %z
@@ -173,6 +201,16 @@ define <32 x i8> @signbit_sel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %mask)
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
 ; AVX512-NEXT:    retq
+;
+; XOP-LABEL: signbit_sel_v32i8:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
+; XOP-NEXT:    vpcomltb %xmm4, %xmm3, %xmm3
+; XOP-NEXT:    vpcomltb %xmm4, %xmm2, %xmm2
+; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; XOP-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; XOP-NEXT:    retq
   %tr = icmp slt <32 x i8> %mask, zeroinitializer
   %z = select <32 x i1> %tr, <32 x i8> %x, <32 x i8> %y
   ret <32 x i8> %z
@@ -206,6 +244,16 @@ define <16 x i16> @signbit_sel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %
 ; AVX512-NEXT:    vpcmpgtw %ymm2, %ymm3, %ymm2
 ; AVX512-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
 ; AVX512-NEXT:    retq
+;
+; XOP-LABEL: signbit_sel_v16i16:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
+; XOP-NEXT:    vpcomltw %xmm4, %xmm3, %xmm3
+; XOP-NEXT:    vpcomltw %xmm4, %xmm2, %xmm2
+; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; XOP-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; XOP-NEXT:    retq
   %tr = icmp slt <16 x i16> %mask, zeroinitializer
   %z = select <16 x i1> %tr, <16 x i16> %x, <16 x i16> %y
   ret <16 x i16> %z
@@ -234,6 +282,11 @@ define <8 x i32> @signbit_sel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %mask)
 ; AVX512VL-NEXT:    vpcmpgtd %ymm2, %ymm3, %k1
 ; AVX512VL-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
 ; AVX512VL-NEXT:    retq
+;
+; XOP-LABEL: signbit_sel_v8i32:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
+; XOP-NEXT:    retq
   %tr = icmp slt <8 x i32> %mask, zeroinitializer
   %z = select <8 x i1> %tr, <8 x i32> %x, <8 x i32> %y
   ret <8 x i32> %z
@@ -262,6 +315,11 @@ define <4 x i64> @signbit_sel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask)
 ; AVX512VL-NEXT:    vpcmpgtq %ymm2, %ymm3, %k1
 ; AVX512VL-NEXT:    vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
 ; AVX512VL-NEXT:    retq
+;
+; XOP-LABEL: signbit_sel_v4i64:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; XOP-NEXT:    retq
   %tr = icmp slt <4 x i64> %mask, zeroinitializer
   %z = select <4 x i1> %tr, <4 x i64> %x, <4 x i64> %y
   ret <4 x i64> %z
@@ -290,6 +348,11 @@ define <4 x double> @signbit_sel_v4f64(<4 x double> %x, <4 x double> %y, <4 x i6
 ; AVX512VL-NEXT:    vpcmpgtq %ymm2, %ymm3, %k1
 ; AVX512VL-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
 ; AVX512VL-NEXT:    retq
+;
+; XOP-LABEL: signbit_sel_v4f64:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; XOP-NEXT:    retq
   %tr = icmp slt <4 x i64> %mask, zeroinitializer
   %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y
   ret <4 x double> %z
@@ -330,6 +393,15 @@ define <4 x double> @signbit_sel_v4f64_small_mask(<4 x double> %x, <4 x double>
 ; AVX512VL-NEXT:    vpcmpgtd %xmm2, %xmm3, %k1
 ; AVX512VL-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
 ; AVX512VL-NEXT:    retq
+;
+; XOP-LABEL: signbit_sel_v4f64_small_mask:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vpmovsxdq %xmm2, %xmm3
+; XOP-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; XOP-NEXT:    vpmovsxdq %xmm2, %xmm2
+; XOP-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; XOP-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; XOP-NEXT:    retq
   %tr = icmp slt <4 x i32> %mask, zeroinitializer
   %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y
   ret <4 x double> %z
@@ -350,6 +422,12 @@ define <8 x double> @signbit_sel_v8f64(<8 x double> %x, <8 x double> %y, <8 x i6
 ; AVX512-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
 ; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512-NEXT:    retq
+;
+; XOP-LABEL: signbit_sel_v8f64:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
+; XOP-NEXT:    vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
+; XOP-NEXT:    retq
   %tr = icmp slt <8 x i64> %mask, zeroinitializer
   %z = select <8 x i1> %tr, <8 x double> %x, <8 x double> %y
   ret <8 x double> %z
@@ -384,6 +462,13 @@ define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x
 ; AVX512VL-NEXT:    vcmpltps %xmm2, %xmm0, %k1
 ; AVX512VL-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
 ; AVX512VL-NEXT:    retq
+;
+; XOP-LABEL: signbit_sel_v4f32_fcmp:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; XOP-NEXT:    vcmpltps %xmm2, %xmm0, %xmm2
+; XOP-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; XOP-NEXT:    retq
   %cmp = fcmp olt <4 x float> %x, zeroinitializer
   %sel = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y
   ret <4 x float> %sel
@@ -420,6 +505,18 @@ define <4 x i64> @blend_splat1_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x
 ; AVX512VL-NEXT:    vptestnmq {{.*}}(%rip){1to4}, %ymm0, %k1
 ; AVX512VL-NEXT:    vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
 ; AVX512VL-NEXT:    retq
+;
+; XOP-LABEL: blend_splat1_mask_cond_v4i64:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; XOP-NEXT:    vpsllq $63, %xmm3, %xmm3
+; XOP-NEXT:    vmovdqa {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553]
+; XOP-NEXT:    vpshaq %xmm4, %xmm3, %xmm3
+; XOP-NEXT:    vpsllq $63, %xmm0, %xmm0
+; XOP-NEXT:    vpshaq %xmm4, %xmm0, %xmm0
+; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; XOP-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
+; XOP-NEXT:    retq
   %a = and <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
   %c = icmp eq <4 x i64> %a, zeroinitializer
   %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
@@ -449,6 +546,14 @@ define <4 x i32> @blend_splat1_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x
 ; AVX512VL-NEXT:    vptestnmd {{.*}}(%rip){1to4}, %xmm0, %k1
 ; AVX512VL-NEXT:    vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
 ; AVX512VL-NEXT:    retq
+;
+; XOP-LABEL: blend_splat1_mask_cond_v4i32:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOP-NEXT:    vpcomneqd %xmm3, %xmm0, %xmm0
+; XOP-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; XOP-NEXT:    retq
   %a = and <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
   %c = icmp eq <4 x i32> %a, zeroinitializer
   %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
@@ -483,6 +588,17 @@ define <16 x i16> @blend_splat1_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <
 ; AVX512-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
 ; AVX512-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
 ; AVX512-NEXT:    retq
+;
+; XOP-LABEL: blend_splat1_mask_cond_v16i16:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vpsllw $15, %xmm0, %xmm3
+; XOP-NEXT:    vpsraw $15, %xmm3, %xmm3
+; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOP-NEXT:    vpsllw $15, %xmm0, %xmm0
+; XOP-NEXT:    vpsraw $15, %xmm0, %xmm0
+; XOP-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
+; XOP-NEXT:    vpcmov %ymm0, %ymm1, %ymm2, %ymm0
+; XOP-NEXT:    retq
   %a = and <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %c = icmp eq <16 x i16> %a, zeroinitializer
   %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
@@ -503,6 +619,14 @@ define <16 x i8> @blend_splat1_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x
 ; AVX512-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
 ; AVX512-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
 ; AVX512-NEXT:    retq
+;
+; XOP-LABEL: blend_splat1_mask_cond_v16i8:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOP-NEXT:    vpcomneqb %xmm3, %xmm0, %xmm0
+; XOP-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
+; XOP-NEXT:    retq
   %a = and <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %c = icmp eq <16 x i8> %a, zeroinitializer
   %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
@@ -532,6 +656,14 @@ define <2 x i64> @blend_splatmax_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2
 ; AVX512VL-NEXT:    vptestnmq {{.*}}(%rip), %xmm0, %k1
 ; AVX512VL-NEXT:    vpblendmq %xmm1, %xmm2, %xmm0 {%k1}
 ; AVX512VL-NEXT:    retq
+;
+; XOP-LABEL: blend_splatmax_mask_cond_v2i64:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOP-NEXT:    vpcomneqq %xmm3, %xmm0, %xmm0
+; XOP-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
+; XOP-NEXT:    retq
   %a = and <2 x i64> %x, <i64 9223372036854775808, i64 9223372036854775808>
   %c = icmp eq <2 x i64> %a, zeroinitializer
   %r = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %z
@@ -559,6 +691,11 @@ define <8 x i32> @blend_splatmax_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8
 ; AVX512VL-NEXT:    vptestnmd {{.*}}(%rip){1to8}, %ymm0, %k1
 ; AVX512VL-NEXT:    vpblendmd %ymm1, %ymm2, %ymm0 {%k1}
 ; AVX512VL-NEXT:    retq
+;
+; XOP-LABEL: blend_splatmax_mask_cond_v8i32:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
+; XOP-NEXT:    retq
   %a = and <8 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
   %c = icmp eq <8 x i32> %a, zeroinitializer
   %r = select <8 x i1> %c, <8 x i32> %y, <8 x i32> %z
@@ -579,6 +716,14 @@ define <8 x i16> @blend_splatmax_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8
 ; AVX512-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
 ; AVX512-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
 ; AVX512-NEXT:    retq
+;
+; XOP-LABEL: blend_splatmax_mask_cond_v8i16:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOP-NEXT:    vpcomneqw %xmm3, %xmm0, %xmm0
+; XOP-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
+; XOP-NEXT:    retq
   %a = and <8 x i16> %x, <i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
   %c = icmp eq <8 x i16> %a, zeroinitializer
   %r = select <8 x i1> %c, <8 x i16> %y, <8 x i16> %z
@@ -610,6 +755,16 @@ define <32 x i8> @blend_splatmax_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32
 ; AVX512-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
 ; AVX512-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
 ; AVX512-NEXT:    retq
+;
+; XOP-LABEL: blend_splatmax_mask_cond_v32i8:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
+; XOP-NEXT:    vpcmpgtb %xmm3, %xmm4, %xmm3
+; XOP-NEXT:    vpcmpgtb %xmm0, %xmm4, %xmm0
+; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; XOP-NEXT:    vpcmov %ymm0, %ymm1, %ymm2, %ymm0
+; XOP-NEXT:    retq
   %a = and <32 x i8> %x, <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>
   %c = icmp eq <32 x i8> %a, zeroinitializer
   %r = select <32 x i1> %c, <32 x i8> %y, <32 x i8> %z
@@ -647,6 +802,18 @@ define <4 x i64> @blend_splat_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i
 ; AVX512VL-NEXT:    vptestnmq {{.*}}(%rip){1to4}, %ymm0, %k1
 ; AVX512VL-NEXT:    vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
 ; AVX512VL-NEXT:    retq
+;
+; XOP-LABEL: blend_splat_mask_cond_v4i64:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; XOP-NEXT:    vpsllq $62, %xmm3, %xmm3
+; XOP-NEXT:    vmovdqa {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553]
+; XOP-NEXT:    vpshaq %xmm4, %xmm3, %xmm3
+; XOP-NEXT:    vpsllq $62, %xmm0, %xmm0
+; XOP-NEXT:    vpshaq %xmm4, %xmm0, %xmm0
+; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; XOP-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
+; XOP-NEXT:    retq
   %a = and <4 x i64> %x, <i64 2, i64 2, i64 2, i64 2>
   %c = icmp eq <4 x i64> %a, zeroinitializer
   %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
@@ -676,6 +843,14 @@ define <4 x i32> @blend_splat_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i
 ; AVX512VL-NEXT:    vptestnmd {{.*}}(%rip){1to4}, %xmm0, %k1
 ; AVX512VL-NEXT:    vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
 ; AVX512VL-NEXT:    retq
+;
+; XOP-LABEL: blend_splat_mask_cond_v4i32:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOP-NEXT:    vpcomneqd %xmm3, %xmm0, %xmm0
+; XOP-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; XOP-NEXT:    retq
   %a = and <4 x i32> %x, <i32 65536, i32 65536, i32 65536, i32 65536>
   %c = icmp eq <4 x i32> %a, zeroinitializer
   %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
@@ -710,6 +885,17 @@ define <16 x i16> @blend_splat_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <1
 ; AVX512-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
 ; AVX512-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
 ; AVX512-NEXT:    retq
+;
+; XOP-LABEL: blend_splat_mask_cond_v16i16:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vpsllw $5, %xmm0, %xmm3
+; XOP-NEXT:    vpsraw $15, %xmm3, %xmm3
+; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOP-NEXT:    vpsllw $5, %xmm0, %xmm0
+; XOP-NEXT:    vpsraw $15, %xmm0, %xmm0
+; XOP-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
+; XOP-NEXT:    vpcmov %ymm0, %ymm1, %ymm2, %ymm0
+; XOP-NEXT:    retq
   %a = and <16 x i16> %x, <i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024>
   %c = icmp eq <16 x i16> %a, zeroinitializer
   %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
@@ -730,6 +916,14 @@ define <16 x i8> @blend_splat_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x
 ; AVX512-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
 ; AVX512-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
 ; AVX512-NEXT:    retq
+;
+; XOP-LABEL: blend_splat_mask_cond_v16i8:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOP-NEXT:    vpcomneqb %xmm3, %xmm0, %xmm0
+; XOP-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
+; XOP-NEXT:    retq
   %a = and <16 x i8> %x, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
   %c = icmp eq <16 x i8> %a, zeroinitializer
   %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
@@ -772,6 +966,17 @@ define <4 x i64> @blend_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z
 ; AVX512VL-NEXT:    vptestnmq {{.*}}(%rip), %ymm0, %k1
 ; AVX512VL-NEXT:    vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
 ; AVX512VL-NEXT:    retq
+;
+; XOP-LABEL: blend_mask_cond_v4i64:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
+; XOP-NEXT:    vpcomeqq %xmm4, %xmm3, %xmm3
+; XOP-NEXT:    vpcomeqq %xmm4, %xmm0, %xmm0
+; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; XOP-NEXT:    vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
+; XOP-NEXT:    retq
   %a = and <4 x i64> %x, <i64 2, i64 4, i64 8, i64 16>
   %c = icmp eq <4 x i64> %a, zeroinitializer
   %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
@@ -804,6 +1009,14 @@ define <4 x i32> @blend_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z
 ; AVX512VL-NEXT:    vptestnmd {{.*}}(%rip), %xmm0, %k1
 ; AVX512VL-NEXT:    vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
 ; AVX512VL-NEXT:    retq
+;
+; XOP-LABEL: blend_mask_cond_v4i32:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOP-NEXT:    vpcomeqd %xmm3, %xmm0, %xmm0
+; XOP-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; XOP-NEXT:    retq
   %a = and <4 x i32> %x, <i32 65536, i32 512, i32 2, i32 1>
   %c = icmp eq <4 x i32> %a, zeroinitializer
   %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
@@ -839,6 +1052,17 @@ define <16 x i16> @blend_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i1
 ; AVX512-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
 ; AVX512-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
 ; AVX512-NEXT:    retq
+;
+; XOP-LABEL: blend_mask_cond_v16i16:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
+; XOP-NEXT:    vpcomeqw %xmm4, %xmm3, %xmm3
+; XOP-NEXT:    vpcomeqw %xmm4, %xmm0, %xmm0
+; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; XOP-NEXT:    vpcmov %ymm0, %ymm2, %ymm1, %ymm0
+; XOP-NEXT:    retq
   %a = and <16 x i16> %x, <i16 1, i16 2, i16 8, i16 4, i16 8, i16 2, i16 2, i16 2, i16 2, i16 8, i16 8, i16 64, i16 64, i16 1024, i16 4096, i16 1024>
   %c = icmp eq <16 x i16> %a, zeroinitializer
   %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
@@ -853,6 +1077,14 @@ define <16 x i8> @blend_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z
 ; AVX-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
 ; AVX-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: blend_mask_cond_v16i8:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOP-NEXT:    vpcomeqb %xmm3, %xmm0, %xmm0
+; XOP-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
+; XOP-NEXT:    retq
   %a = and <16 x i8> %x, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 2, i8 2, i8 2, i8 2>
   %c = icmp eq <16 x i8> %a, zeroinitializer
   %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
@@ -892,6 +1124,19 @@ define void @PR46531(i32* %x, i32* %y, i32* %z) {
 ; AVX512VL-NEXT:    vpord %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512VL-NEXT:    vmovdqu %xmm2, (%rdi)
 ; AVX512VL-NEXT:    retq
+;
+; XOP-LABEL: PR46531:
+; XOP:       # %bb.0:
+; XOP-NEXT:    vmovdqu (%rsi), %xmm0
+; XOP-NEXT:    vmovdqu (%rdx), %xmm1
+; XOP-NEXT:    vpor %xmm0, %xmm1, %xmm2
+; XOP-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm3
+; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
+; XOP-NEXT:    vpcomneqd %xmm4, %xmm3, %xmm3
+; XOP-NEXT:    vpxor %xmm0, %xmm1, %xmm0
+; XOP-NEXT:    vblendvps %xmm3, %xmm0, %xmm2, %xmm0
+; XOP-NEXT:    vmovups %xmm0, (%rdi)
+; XOP-NEXT:    retq
   %vy = bitcast i32* %y to <4 x i32>*
   %a = load <4 x i32>, <4 x i32>* %vy, align 4
   %vz = bitcast i32* %z to <4 x i32>*


        


More information about the llvm-commits mailing list