[llvm] ca10a6c - [X86] Add test coverage for min/max signbit simplification
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 20 06:20:56 PDT 2023
Author: Simon Pilgrim
Date: 2023-08-20T14:20:49+01:00
New Revision: ca10a6caee412ee28cb18d5a5afd843ae3a7c823
URL: https://github.com/llvm/llvm-project/commit/ca10a6caee412ee28cb18d5a5afd843ae3a7c823
DIFF: https://github.com/llvm/llvm-project/commit/ca10a6caee412ee28cb18d5a5afd843ae3a7c823.diff
LOG: [X86] Add test coverage for min/max signbit simplification
If we're only demanding the signbit from a min/max then we can simplify this to a logic op
Added:
Modified:
llvm/test/CodeGen/X86/combine-smax.ll
llvm/test/CodeGen/X86/combine-smin.ll
llvm/test/CodeGen/X86/combine-umax.ll
llvm/test/CodeGen/X86/combine-umin.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/combine-smax.ll b/llvm/test/CodeGen/X86/combine-smax.ll
index a5b6a54051cde2..39c2f4dcd95db0 100644
--- a/llvm/test/CodeGen/X86/combine-smax.ll
+++ b/llvm/test/CodeGen/X86/combine-smax.ll
@@ -2,10 +2,10 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE42
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX2,AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2,AVX512BW
define <16 x i8> @test_v16i8_nosignbit(<16 x i8> %a, <16 x i8> %b) {
; SSE2-LABEL: test_v16i8_nosignbit:
@@ -87,4 +87,63 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
%2 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %1, <16 x i8> zeroinitializer)
ret <16 x i8> %2
}
+
+define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test_v16i8_demandedbits:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm4
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm4
+; SSE2-NEXT: pand %xmm4, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm4
+; SSE2-NEXT: por %xmm0, %xmm4
+; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: pcmpgtb %xmm4, %xmm0
+; SSE2-NEXT: pand %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm2, %xmm0
+; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_v16i8_demandedbits:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pmaxsb %xmm1, %xmm0
+; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: test_v16i8_demandedbits:
+; SSE42: # %bb.0:
+; SSE42-NEXT: pmaxsb %xmm1, %xmm0
+; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm2
+; SSE42-NEXT: movdqa %xmm2, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX1OR2-LABEL: test_v16i8_demandedbits:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
+; AVX1OR2-NEXT: retq
+;
+; AVX512F-LABEL: test_v16i8_demandedbits:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: test_v16i8_demandedbits:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
+; AVX512BW-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512BW-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k1
+; AVX512BW-NEXT: vpblendmb %zmm2, %zmm3, %zmm0 {%k1}
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+ %smax = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> %x, <16 x i8> %y)
+ %cmp = icmp sge <16 x i8> %smax, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %res
+}
+
declare <16 x i8> @llvm.smax.v16i8(<16 x i8> %x, <16 x i8> %y)
diff --git a/llvm/test/CodeGen/X86/combine-smin.ll b/llvm/test/CodeGen/X86/combine-smin.ll
index 6a44c6b911eed5..357e1060fc2b1d 100644
--- a/llvm/test/CodeGen/X86/combine-smin.ll
+++ b/llvm/test/CodeGen/X86/combine-smin.ll
@@ -2,10 +2,10 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE42
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX2,AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2,AVX512BW
define <16 x i8> @test_v16i8_nosignbit(<16 x i8> %a, <16 x i8> %b) {
; SSE2-LABEL: test_v16i8_nosignbit:
@@ -89,4 +89,63 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
%2 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %1, <16 x i8> zeroinitializer)
ret <16 x i8> %2
}
+
+define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test_v16i8_demandedbits:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pcmpgtb %xmm0, %xmm4
+; SSE2-NEXT: pand %xmm4, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm4
+; SSE2-NEXT: por %xmm0, %xmm4
+; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: pcmpgtb %xmm4, %xmm0
+; SSE2-NEXT: pand %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm2, %xmm0
+; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_v16i8_demandedbits:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pminsb %xmm1, %xmm0
+; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: test_v16i8_demandedbits:
+; SSE42: # %bb.0:
+; SSE42-NEXT: pminsb %xmm1, %xmm0
+; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm2
+; SSE42-NEXT: movdqa %xmm2, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX1OR2-LABEL: test_v16i8_demandedbits:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
+; AVX1OR2-NEXT: retq
+;
+; AVX512F-LABEL: test_v16i8_demandedbits:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: test_v16i8_demandedbits:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
+; AVX512BW-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512BW-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k1
+; AVX512BW-NEXT: vpblendmb %zmm2, %zmm3, %zmm0 {%k1}
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+ %smin = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> %x, <16 x i8> %y)
+ %cmp = icmp sge <16 x i8> %smin, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %res
+}
+
declare <16 x i8> @llvm.smin.v16i8(<16 x i8> %x, <16 x i8> %y)
diff --git a/llvm/test/CodeGen/X86/combine-umax.ll b/llvm/test/CodeGen/X86/combine-umax.ll
index fbfdb0b5b6279b..52bb9ee7fcb9f5 100644
--- a/llvm/test/CodeGen/X86/combine-umax.ll
+++ b/llvm/test/CodeGen/X86/combine-umax.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE42
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512BW
define <8 x i16> @test_v8i16_nosignbit(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: test_v8i16_nosignbit:
@@ -43,26 +43,12 @@ define <8 x i16> @test_v8i16_nosignbit(<8 x i16> %a, <8 x i16> %b) {
}
define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
-; SSE2-LABEL: test_v16i8_reassociation:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; SSE2-NEXT: pmaxub %xmm1, %xmm0
-; SSE2-NEXT: pmaxub %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v16i8_reassociation:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; SSE41-NEXT: pmaxub %xmm1, %xmm0
-; SSE41-NEXT: pmaxub %xmm1, %xmm0
-; SSE41-NEXT: retq
-;
-; SSE42-LABEL: test_v16i8_reassociation:
-; SSE42: # %bb.0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; SSE42-NEXT: pmaxub %xmm1, %xmm0
-; SSE42-NEXT: pmaxub %xmm1, %xmm0
-; SSE42-NEXT: retq
+; SSE-LABEL: test_v16i8_reassociation:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; SSE-NEXT: pmaxub %xmm1, %xmm0
+; SSE-NEXT: pmaxub %xmm1, %xmm0
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v16i8_reassociation:
; AVX: # %bb.0:
@@ -74,4 +60,60 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
%2 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
ret <16 x i8> %2
}
+
+define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test_v16i8_demandedbits:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pmaxub %xmm1, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtb %xmm0, %xmm1
+; SSE2-NEXT: pand %xmm1, %xmm3
+; SSE2-NEXT: pandn %xmm2, %xmm1
+; SSE2-NEXT: por %xmm3, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_v16i8_demandedbits:
+; SSE41: # %bb.0:
+; SSE41-NEXT: orps %xmm1, %xmm0
+; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: test_v16i8_demandedbits:
+; SSE42: # %bb.0:
+; SSE42-NEXT: orps %xmm1, %xmm0
+; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm2
+; SSE42-NEXT: movdqa %xmm2, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX1OR2-LABEL: test_v16i8_demandedbits:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
+; AVX1OR2-NEXT: retq
+;
+; AVX512F-LABEL: test_v16i8_demandedbits:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: test_v16i8_demandedbits:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
+; AVX512BW-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512BW-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k1
+; AVX512BW-NEXT: vpblendmb %zmm2, %zmm3, %zmm0 {%k1}
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+ %umax = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %x, <16 x i8> %y)
+ %cmp = icmp sge <16 x i8> %umax, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %res
+}
+
declare <16 x i8> @llvm.umax.v16i8(<16 x i8> %x, <16 x i8> %y)
diff --git a/llvm/test/CodeGen/X86/combine-umin.ll b/llvm/test/CodeGen/X86/combine-umin.ll
index 0b1115a1994858..5b3b7f942805d9 100644
--- a/llvm/test/CodeGen/X86/combine-umin.ll
+++ b/llvm/test/CodeGen/X86/combine-umin.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1OR2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX1OR2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX,AVX512BW
define i8 @test_demandedbits_umin_ult(i8 %a0, i8 %a1) {
; CHECK-LABEL: test_demandedbits_umin_ult:
@@ -60,26 +60,12 @@ define <8 x i16> @test_v8i16_nosignbit(<8 x i16> %a, <8 x i16> %b) {
}
define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
-; SSE2-LABEL: test_v16i8_reassociation:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; SSE2-NEXT: pminub %xmm1, %xmm0
-; SSE2-NEXT: pminub %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v16i8_reassociation:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; SSE41-NEXT: pminub %xmm1, %xmm0
-; SSE41-NEXT: pminub %xmm1, %xmm0
-; SSE41-NEXT: retq
-;
-; SSE42-LABEL: test_v16i8_reassociation:
-; SSE42: # %bb.0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; SSE42-NEXT: pminub %xmm1, %xmm0
-; SSE42-NEXT: pminub %xmm1, %xmm0
-; SSE42-NEXT: retq
+; SSE-LABEL: test_v16i8_reassociation:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; SSE-NEXT: pminub %xmm1, %xmm0
+; SSE-NEXT: pminub %xmm1, %xmm0
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v16i8_reassociation:
; AVX: # %bb.0:
@@ -91,4 +77,60 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
%2 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
ret <16 x i8> %2
}
+
+define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test_v16i8_demandedbits:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pminub %xmm1, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtb %xmm0, %xmm1
+; SSE2-NEXT: pand %xmm1, %xmm3
+; SSE2-NEXT: pandn %xmm2, %xmm1
+; SSE2-NEXT: por %xmm3, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_v16i8_demandedbits:
+; SSE41: # %bb.0:
+; SSE41-NEXT: andps %xmm1, %xmm0
+; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: test_v16i8_demandedbits:
+; SSE42: # %bb.0:
+; SSE42-NEXT: andps %xmm1, %xmm0
+; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm2
+; SSE42-NEXT: movdqa %xmm2, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX1OR2-LABEL: test_v16i8_demandedbits:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
+; AVX1OR2-NEXT: retq
+;
+; AVX512F-LABEL: test_v16i8_demandedbits:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: test_v16i8_demandedbits:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
+; AVX512BW-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k1
+; AVX512BW-NEXT: vpblendmb %zmm2, %zmm3, %zmm0 {%k1}
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+ %umin = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %x, <16 x i8> %y)
+ %cmp = icmp sge <16 x i8> %umin, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %res
+}
+
declare <16 x i8> @llvm.umin.v16i8(<16 x i8> %x, <16 x i8> %y)
More information about the llvm-commits
mailing list