[llvm] d6c72bd - [X86][XOP] Add XOP target vselect-pcmp tests
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 6 06:01:12 PDT 2020
Author: Simon Pilgrim
Date: 2020-07-06T13:58:26+01:00
New Revision: d6c72bdca2f20e724a755186e5c578b70b96b192
URL: https://github.com/llvm/llvm-project/commit/d6c72bdca2f20e724a755186e5c578b70b96b192
DIFF: https://github.com/llvm/llvm-project/commit/d6c72bdca2f20e724a755186e5c578b70b96b192.diff
LOG: [X86][XOP] Add XOP target vselect-pcmp tests
Noticed in the D83181 that XOP can probably do a lot more than other targets due to its vector shifts and vpcmov instructions
Added:
Modified:
llvm/test/CodeGen/X86/vselect-pcmp.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/vselect-pcmp.ll b/llvm/test/CodeGen/X86/vselect-pcmp.ll
index bc6dc30a9658..c393955e2088 100644
--- a/llvm/test/CodeGen/X86/vselect-pcmp.ll
+++ b/llvm/test/CodeGen/X86/vselect-pcmp.ll
@@ -1,8 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX12F --check-prefix=AVX12 --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX12F --check-prefix=AVX12 --check-prefix=AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX12F --check-prefix=AVX512 --check-prefix=AVX512F
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX12F,AVX12,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX12F,AVX12,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX12F,AVX512,AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=xop | FileCheck %s --check-prefixes=CHECK,XOP
; The condition vector for BLENDV* only cares about the sign bit of each element.
; So in these tests, if we generate BLENDV*, we should be able to remove the redundant cmp op.
@@ -10,10 +11,10 @@
; Test 128-bit vectors for all legal element types.
define <16 x i8> @signbit_sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) {
-; AVX-LABEL: signbit_sel_v16i8:
-; AVX: # %bb.0:
-; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT: retq
+; CHECK-LABEL: signbit_sel_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
%tr = icmp slt <16 x i8> %mask, zeroinitializer
%z = select <16 x i1> %tr, <16 x i8> %x, <16 x i8> %y
ret <16 x i8> %z
@@ -28,6 +29,13 @@ define <8 x i16> @signbit_sel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask)
; AVX-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: signbit_sel_v8i16:
+; XOP: # %bb.0:
+; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOP-NEXT: vpcomltw %xmm3, %xmm2, %xmm2
+; XOP-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
+; XOP-NEXT: retq
%tr = icmp slt <8 x i16> %mask, zeroinitializer
%z = select <8 x i1> %tr, <8 x i16> %x, <8 x i16> %y
ret <8 x i16> %z
@@ -57,6 +65,11 @@ define <4 x i32> @signbit_sel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask)
; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1
; AVX512VL-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
; AVX512VL-NEXT: retq
+;
+; XOP-LABEL: signbit_sel_v4i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; XOP-NEXT: retq
%tr = icmp slt <4 x i32> %mask, zeroinitializer
%z = select <4 x i1> %tr, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %z
@@ -86,6 +99,11 @@ define <2 x i64> @signbit_sel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask)
; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1
; AVX512VL-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
; AVX512VL-NEXT: retq
+;
+; XOP-LABEL: signbit_sel_v2i64:
+; XOP: # %bb.0:
+; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; XOP-NEXT: retq
%tr = icmp slt <2 x i64> %mask, zeroinitializer
%z = select <2 x i1> %tr, <2 x i64> %x, <2 x i64> %y
ret <2 x i64> %z
@@ -115,6 +133,11 @@ define <4 x float> @signbit_sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i32>
; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1
; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
; AVX512VL-NEXT: retq
+;
+; XOP-LABEL: signbit_sel_v4f32:
+; XOP: # %bb.0:
+; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; XOP-NEXT: retq
%tr = icmp slt <4 x i32> %mask, zeroinitializer
%z = select <4 x i1> %tr, <4 x float> %x, <4 x float> %y
ret <4 x float> %z
@@ -144,6 +167,11 @@ define <2 x double> @signbit_sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i6
; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1
; AVX512VL-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
; AVX512VL-NEXT: retq
+;
+; XOP-LABEL: signbit_sel_v2f64:
+; XOP: # %bb.0:
+; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; XOP-NEXT: retq
%tr = icmp slt <2 x i64> %mask, zeroinitializer
%z = select <2 x i1> %tr, <2 x double> %x, <2 x double> %y
ret <2 x double> %z
@@ -173,6 +201,16 @@ define <32 x i8> @signbit_sel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %mask)
; AVX512: # %bb.0:
; AVX512-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX512-NEXT: retq
+;
+; XOP-LABEL: signbit_sel_v32i8:
+; XOP: # %bb.0:
+; XOP-NEXT: vextractf128 $1, %ymm2, %xmm3
+; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; XOP-NEXT: vpcomltb %xmm4, %xmm3, %xmm3
+; XOP-NEXT: vpcomltb %xmm4, %xmm2, %xmm2
+; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; XOP-NEXT: retq
%tr = icmp slt <32 x i8> %mask, zeroinitializer
%z = select <32 x i1> %tr, <32 x i8> %x, <32 x i8> %y
ret <32 x i8> %z
@@ -206,6 +244,16 @@ define <16 x i16> @signbit_sel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %
; AVX512-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
; AVX512-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX512-NEXT: retq
+;
+; XOP-LABEL: signbit_sel_v16i16:
+; XOP: # %bb.0:
+; XOP-NEXT: vextractf128 $1, %ymm2, %xmm3
+; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; XOP-NEXT: vpcomltw %xmm4, %xmm3, %xmm3
+; XOP-NEXT: vpcomltw %xmm4, %xmm2, %xmm2
+; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; XOP-NEXT: retq
%tr = icmp slt <16 x i16> %mask, zeroinitializer
%z = select <16 x i1> %tr, <16 x i16> %x, <16 x i16> %y
ret <16 x i16> %z
@@ -234,6 +282,11 @@ define <8 x i32> @signbit_sel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %mask)
; AVX512VL-NEXT: vpcmpgtd %ymm2, %ymm3, %k1
; AVX512VL-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
; AVX512VL-NEXT: retq
+;
+; XOP-LABEL: signbit_sel_v8i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
+; XOP-NEXT: retq
%tr = icmp slt <8 x i32> %mask, zeroinitializer
%z = select <8 x i1> %tr, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %z
@@ -262,6 +315,11 @@ define <4 x i64> @signbit_sel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask)
; AVX512VL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1
; AVX512VL-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
; AVX512VL-NEXT: retq
+;
+; XOP-LABEL: signbit_sel_v4i64:
+; XOP: # %bb.0:
+; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; XOP-NEXT: retq
%tr = icmp slt <4 x i64> %mask, zeroinitializer
%z = select <4 x i1> %tr, <4 x i64> %x, <4 x i64> %y
ret <4 x i64> %z
@@ -290,6 +348,11 @@ define <4 x double> @signbit_sel_v4f64(<4 x double> %x, <4 x double> %y, <4 x i6
; AVX512VL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1
; AVX512VL-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
; AVX512VL-NEXT: retq
+;
+; XOP-LABEL: signbit_sel_v4f64:
+; XOP: # %bb.0:
+; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; XOP-NEXT: retq
%tr = icmp slt <4 x i64> %mask, zeroinitializer
%z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y
ret <4 x double> %z
@@ -330,6 +393,15 @@ define <4 x double> @signbit_sel_v4f64_small_mask(<4 x double> %x, <4 x double>
; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1
; AVX512VL-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
; AVX512VL-NEXT: retq
+;
+; XOP-LABEL: signbit_sel_v4f64_small_mask:
+; XOP: # %bb.0:
+; XOP-NEXT: vpmovsxdq %xmm2, %xmm3
+; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; XOP-NEXT: vpmovsxdq %xmm2, %xmm2
+; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; XOP-NEXT: retq
%tr = icmp slt <4 x i32> %mask, zeroinitializer
%z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y
ret <4 x double> %z
@@ -350,6 +422,12 @@ define <8 x double> @signbit_sel_v8f64(<8 x double> %x, <8 x double> %y, <8 x i6
; AVX512-NEXT: vpcmpgtq %zmm2, %zmm3, %k1
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
; AVX512-NEXT: retq
+;
+; XOP-LABEL: signbit_sel_v8f64:
+; XOP: # %bb.0:
+; XOP-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
+; XOP-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
+; XOP-NEXT: retq
%tr = icmp slt <8 x i64> %mask, zeroinitializer
%z = select <8 x i1> %tr, <8 x double> %x, <8 x double> %y
ret <8 x double> %z
@@ -384,6 +462,13 @@ define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x
; AVX512VL-NEXT: vcmpltps %xmm2, %xmm0, %k1
; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
; AVX512VL-NEXT: retq
+;
+; XOP-LABEL: signbit_sel_v4f32_fcmp:
+; XOP: # %bb.0:
+; XOP-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; XOP-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
+; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; XOP-NEXT: retq
%cmp = fcmp olt <4 x float> %x, zeroinitializer
%sel = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y
ret <4 x float> %sel
@@ -420,6 +505,18 @@ define <4 x i64> @blend_splat1_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x
; AVX512VL-NEXT: vptestnmq {{.*}}(%rip){1to4}, %ymm0, %k1
; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
; AVX512VL-NEXT: retq
+;
+; XOP-LABEL: blend_splat1_mask_cond_v4i64:
+; XOP: # %bb.0:
+; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
+; XOP-NEXT: vpsllq $63, %xmm3, %xmm3
+; XOP-NEXT: vmovdqa {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553]
+; XOP-NEXT: vpshaq %xmm4, %xmm3, %xmm3
+; XOP-NEXT: vpsllq $63, %xmm0, %xmm0
+; XOP-NEXT: vpshaq %xmm4, %xmm0, %xmm0
+; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; XOP-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
+; XOP-NEXT: retq
%a = and <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
%c = icmp eq <4 x i64> %a, zeroinitializer
%r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
@@ -449,6 +546,14 @@ define <4 x i32> @blend_splat1_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x
; AVX512VL-NEXT: vptestnmd {{.*}}(%rip){1to4}, %xmm0, %k1
; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
; AVX512VL-NEXT: retq
+;
+; XOP-LABEL: blend_splat1_mask_cond_v4i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOP-NEXT: vpcomneqd %xmm3, %xmm0, %xmm0
+; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; XOP-NEXT: retq
%a = and <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
%c = icmp eq <4 x i32> %a, zeroinitializer
%r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
@@ -483,6 +588,17 @@ define <16 x i16> @blend_splat1_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <
; AVX512-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
; AVX512-NEXT: retq
+;
+; XOP-LABEL: blend_splat1_mask_cond_v16i16:
+; XOP: # %bb.0:
+; XOP-NEXT: vpsllw $15, %xmm0, %xmm3
+; XOP-NEXT: vpsraw $15, %xmm3, %xmm3
+; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOP-NEXT: vpsllw $15, %xmm0, %xmm0
+; XOP-NEXT: vpsraw $15, %xmm0, %xmm0
+; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
+; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0
+; XOP-NEXT: retq
%a = and <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%c = icmp eq <16 x i16> %a, zeroinitializer
%r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
@@ -503,6 +619,14 @@ define <16 x i8> @blend_splat1_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x
; AVX512-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
; AVX512-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
; AVX512-NEXT: retq
+;
+; XOP-LABEL: blend_splat1_mask_cond_v16i8:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOP-NEXT: vpcomneqb %xmm3, %xmm0, %xmm0
+; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
+; XOP-NEXT: retq
%a = and <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%c = icmp eq <16 x i8> %a, zeroinitializer
%r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
@@ -532,6 +656,14 @@ define <2 x i64> @blend_splatmax_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2
; AVX512VL-NEXT: vptestnmq {{.*}}(%rip), %xmm0, %k1
; AVX512VL-NEXT: vpblendmq %xmm1, %xmm2, %xmm0 {%k1}
; AVX512VL-NEXT: retq
+;
+; XOP-LABEL: blend_splatmax_mask_cond_v2i64:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOP-NEXT: vpcomneqq %xmm3, %xmm0, %xmm0
+; XOP-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
+; XOP-NEXT: retq
%a = and <2 x i64> %x, <i64 9223372036854775808, i64 9223372036854775808>
%c = icmp eq <2 x i64> %a, zeroinitializer
%r = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %z
@@ -559,6 +691,11 @@ define <8 x i32> @blend_splatmax_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8
; AVX512VL-NEXT: vptestnmd {{.*}}(%rip){1to8}, %ymm0, %k1
; AVX512VL-NEXT: vpblendmd %ymm1, %ymm2, %ymm0 {%k1}
; AVX512VL-NEXT: retq
+;
+; XOP-LABEL: blend_splatmax_mask_cond_v8i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
+; XOP-NEXT: retq
%a = and <8 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
%c = icmp eq <8 x i32> %a, zeroinitializer
%r = select <8 x i1> %c, <8 x i32> %y, <8 x i32> %z
@@ -579,6 +716,14 @@ define <8 x i16> @blend_splatmax_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8
; AVX512-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
; AVX512-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
; AVX512-NEXT: retq
+;
+; XOP-LABEL: blend_splatmax_mask_cond_v8i16:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOP-NEXT: vpcomneqw %xmm3, %xmm0, %xmm0
+; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
+; XOP-NEXT: retq
%a = and <8 x i16> %x, <i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
%c = icmp eq <8 x i16> %a, zeroinitializer
%r = select <8 x i1> %c, <8 x i16> %y, <8 x i16> %z
@@ -610,6 +755,16 @@ define <32 x i8> @blend_splatmax_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32
; AVX512-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
; AVX512-NEXT: retq
+;
+; XOP-LABEL: blend_splatmax_mask_cond_v32i8:
+; XOP: # %bb.0:
+; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
+; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; XOP-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3
+; XOP-NEXT: vpcmpgtb %xmm0, %xmm4, %xmm0
+; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0
+; XOP-NEXT: retq
%a = and <32 x i8> %x, <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>
%c = icmp eq <32 x i8> %a, zeroinitializer
%r = select <32 x i1> %c, <32 x i8> %y, <32 x i8> %z
@@ -647,6 +802,18 @@ define <4 x i64> @blend_splat_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i
; AVX512VL-NEXT: vptestnmq {{.*}}(%rip){1to4}, %ymm0, %k1
; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
; AVX512VL-NEXT: retq
+;
+; XOP-LABEL: blend_splat_mask_cond_v4i64:
+; XOP: # %bb.0:
+; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
+; XOP-NEXT: vpsllq $62, %xmm3, %xmm3
+; XOP-NEXT: vmovdqa {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553]
+; XOP-NEXT: vpshaq %xmm4, %xmm3, %xmm3
+; XOP-NEXT: vpsllq $62, %xmm0, %xmm0
+; XOP-NEXT: vpshaq %xmm4, %xmm0, %xmm0
+; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; XOP-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
+; XOP-NEXT: retq
%a = and <4 x i64> %x, <i64 2, i64 2, i64 2, i64 2>
%c = icmp eq <4 x i64> %a, zeroinitializer
%r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
@@ -676,6 +843,14 @@ define <4 x i32> @blend_splat_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i
; AVX512VL-NEXT: vptestnmd {{.*}}(%rip){1to4}, %xmm0, %k1
; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
; AVX512VL-NEXT: retq
+;
+; XOP-LABEL: blend_splat_mask_cond_v4i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOP-NEXT: vpcomneqd %xmm3, %xmm0, %xmm0
+; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; XOP-NEXT: retq
%a = and <4 x i32> %x, <i32 65536, i32 65536, i32 65536, i32 65536>
%c = icmp eq <4 x i32> %a, zeroinitializer
%r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
@@ -710,6 +885,17 @@ define <16 x i16> @blend_splat_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <1
; AVX512-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
; AVX512-NEXT: retq
+;
+; XOP-LABEL: blend_splat_mask_cond_v16i16:
+; XOP: # %bb.0:
+; XOP-NEXT: vpsllw $5, %xmm0, %xmm3
+; XOP-NEXT: vpsraw $15, %xmm3, %xmm3
+; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOP-NEXT: vpsllw $5, %xmm0, %xmm0
+; XOP-NEXT: vpsraw $15, %xmm0, %xmm0
+; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
+; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0
+; XOP-NEXT: retq
%a = and <16 x i16> %x, <i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024>
%c = icmp eq <16 x i16> %a, zeroinitializer
%r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
@@ -730,6 +916,14 @@ define <16 x i8> @blend_splat_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x
; AVX512-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
; AVX512-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
; AVX512-NEXT: retq
+;
+; XOP-LABEL: blend_splat_mask_cond_v16i8:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOP-NEXT: vpcomneqb %xmm3, %xmm0, %xmm0
+; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
+; XOP-NEXT: retq
%a = and <16 x i8> %x, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
%c = icmp eq <16 x i8> %a, zeroinitializer
%r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
@@ -772,6 +966,17 @@ define <4 x i64> @blend_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z
; AVX512VL-NEXT: vptestnmq {{.*}}(%rip), %ymm0, %k1
; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
; AVX512VL-NEXT: retq
+;
+; XOP-LABEL: blend_mask_cond_v4i64:
+; XOP: # %bb.0:
+; XOP-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
+; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; XOP-NEXT: vpcomeqq %xmm4, %xmm3, %xmm3
+; XOP-NEXT: vpcomeqq %xmm4, %xmm0, %xmm0
+; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; XOP-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
+; XOP-NEXT: retq
%a = and <4 x i64> %x, <i64 2, i64 4, i64 8, i64 16>
%c = icmp eq <4 x i64> %a, zeroinitializer
%r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
@@ -804,6 +1009,14 @@ define <4 x i32> @blend_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z
; AVX512VL-NEXT: vptestnmd {{.*}}(%rip), %xmm0, %k1
; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
; AVX512VL-NEXT: retq
+;
+; XOP-LABEL: blend_mask_cond_v4i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOP-NEXT: vpcomeqd %xmm3, %xmm0, %xmm0
+; XOP-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; XOP-NEXT: retq
%a = and <4 x i32> %x, <i32 65536, i32 512, i32 2, i32 1>
%c = icmp eq <4 x i32> %a, zeroinitializer
%r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
@@ -839,6 +1052,17 @@ define <16 x i16> @blend_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i1
; AVX512-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
; AVX512-NEXT: retq
+;
+; XOP-LABEL: blend_mask_cond_v16i16:
+; XOP: # %bb.0:
+; XOP-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
+; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; XOP-NEXT: vpcomeqw %xmm4, %xmm3, %xmm3
+; XOP-NEXT: vpcomeqw %xmm4, %xmm0, %xmm0
+; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; XOP-NEXT: vpcmov %ymm0, %ymm2, %ymm1, %ymm0
+; XOP-NEXT: retq
%a = and <16 x i16> %x, <i16 1, i16 2, i16 8, i16 4, i16 8, i16 2, i16 2, i16 2, i16 2, i16 8, i16 8, i16 64, i16 64, i16 1024, i16 4096, i16 1024>
%c = icmp eq <16 x i16> %a, zeroinitializer
%r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
@@ -853,6 +1077,14 @@ define <16 x i8> @blend_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z
; AVX-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
; AVX-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: blend_mask_cond_v16i8:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOP-NEXT: vpcomeqb %xmm3, %xmm0, %xmm0
+; XOP-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
+; XOP-NEXT: retq
%a = and <16 x i8> %x, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 2, i8 2, i8 2, i8 2>
%c = icmp eq <16 x i8> %a, zeroinitializer
%r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
@@ -892,6 +1124,19 @@ define void @PR46531(i32* %x, i32* %y, i32* %z) {
; AVX512VL-NEXT: vpord %xmm0, %xmm1, %xmm2 {%k1}
; AVX512VL-NEXT: vmovdqu %xmm2, (%rdi)
; AVX512VL-NEXT: retq
+;
+; XOP-LABEL: PR46531:
+; XOP: # %bb.0:
+; XOP-NEXT: vmovdqu (%rsi), %xmm0
+; XOP-NEXT: vmovdqu (%rdx), %xmm1
+; XOP-NEXT: vpor %xmm0, %xmm1, %xmm2
+; XOP-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm3
+; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; XOP-NEXT: vpcomneqd %xmm4, %xmm3, %xmm3
+; XOP-NEXT: vpxor %xmm0, %xmm1, %xmm0
+; XOP-NEXT: vblendvps %xmm3, %xmm0, %xmm2, %xmm0
+; XOP-NEXT: vmovups %xmm0, (%rdi)
+; XOP-NEXT: retq
%vy = bitcast i32* %y to <4 x i32>*
%a = load <4 x i32>, <4 x i32>* %vy, align 4
%vz = bitcast i32* %z to <4 x i32>*
More information about the llvm-commits
mailing list