[llvm] 5ea7b6b - [X86] vselect-avx.ll - add AVX512 test coverage
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 16 03:12:37 PST 2022
Author: Simon Pilgrim
Date: 2022-11-16T11:12:27Z
New Revision: 5ea7b6b78ff6c7d914e0e9279a368966be5f13df
URL: https://github.com/llvm/llvm-project/commit/5ea7b6b78ff6c7d914e0e9279a368966be5f13df
DIFF: https://github.com/llvm/llvm-project/commit/5ea7b6b78ff6c7d914e0e9279a368966be5f13df.diff
LOG: [X86] vselect-avx.ll - add AVX512 test coverage
Ensure we test predicate selects as well
Added:
Modified:
llvm/test/CodeGen/X86/vselect-avx.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/vselect-avx.ll b/llvm/test/CodeGen/X86/vselect-avx.ll
index f6f4e220d323b..69dfc01777497 100644
--- a/llvm/test/CodeGen/X86/vselect-avx.ll
+++ b/llvm/test/CodeGen/X86/vselect-avx.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@@ -63,6 +64,17 @@ define void @test2(ptr %call1559, i64 %indvars.iv4198, <4 x i1> %tmp1895) {
; AVX2-NEXT: vmovupd %ymm0, (%rax)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test2:
+; AVX512: ## %bb.0: ## %bb
+; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k1
+; AVX512-NEXT: movq (%rdi,%rsi,8), %rax
+; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm0 = [5.0E-1,5.0E-1,5.0E-1,5.0E-1]
+; AVX512-NEXT: vbroadcastsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 {%k1}
+; AVX512-NEXT: vmovupd %ymm0, (%rax)
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
bb:
%arrayidx1928 = getelementptr inbounds ptr, ptr %call1559, i64 %indvars.iv4198
%tmp1888 = load ptr, ptr %arrayidx1928, align 8
@@ -113,6 +125,20 @@ define void @test3(<4 x i32> %induction30, ptr %tmp16, ptr %tmp17, <4 x i16> %t
; AVX2-NEXT: vmovq %xmm0, (%rdi)
; AVX2-NEXT: vmovq %xmm1, (%rsi)
; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test3:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512-NEXT: vpcmpleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm1
+; AVX512-NEXT: vmovq %xmm0, (%rdi)
+; AVX512-NEXT: vmovq %xmm1, (%rsi)
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%tmp6 = srem <4 x i32> %induction30, <i32 3, i32 3, i32 3, i32 3>
%tmp7 = icmp eq <4 x i32> %tmp6, zeroinitializer
%predphi = select <4 x i1> %tmp7, <4 x i16> %tmp3, <4 x i16> %tmp12
@@ -152,6 +178,13 @@ define <32 x i8> @PR22706(<32 x i1> %x) {
; AVX2-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; AVX512-LABEL: PR22706:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; AVX512-NEXT: vpblendvb %ymm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
+; AVX512-NEXT: retq
%tmp = select <32 x i1> %x, <32 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <32 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
ret <32 x i8> %tmp
}
@@ -186,6 +219,19 @@ define void @blendv_split(ptr %p, <8 x i32> %cond, <8 x i32> %a, <8 x i32> %x, <
; AVX2-NEXT: vmovups %ymm0, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
+;
+; AVX512-LABEL: blendv_split:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpsrld $31, %ymm0, %ymm0
+; AVX512-NEXT: vpslld $31, %ymm0, %ymm0
+; AVX512-NEXT: vptestmd %ymm0, %ymm0, %k1
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero
+; AVX512-NEXT: vpslld %xmm2, %ymm1, %ymm2
+; AVX512-NEXT: vpslld %xmm0, %ymm1, %ymm2 {%k1}
+; AVX512-NEXT: vmovdqu %ymm2, (%rdi)
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%signbits = ashr <8 x i32> %cond, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%bool = trunc <8 x i32> %signbits to <8 x i1>
%shamt1 = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -262,6 +308,28 @@ define void @vselect_concat_splat() {
; AVX2-NEXT: vmovups %xmm2, (%rax)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
+;
+; AVX512-LABEL: vselect_concat_splat:
+; AVX512: ## %bb.0: ## %entry
+; AVX512-NEXT: vmovups (%rax), %ymm0
+; AVX512-NEXT: vmovups (%rax), %xmm1
+; AVX512-NEXT: vmovaps {{.*#+}} xmm2 = [0,3,6,9]
+; AVX512-NEXT: vmovaps %ymm0, %ymm3
+; AVX512-NEXT: vpermt2ps %ymm1, %ymm2, %ymm3
+; AVX512-NEXT: vmovaps {{.*#+}} xmm4 = [1,4,7,10]
+; AVX512-NEXT: vpermt2ps %ymm1, %ymm4, %ymm0
+; AVX512-NEXT: vmovups 0, %ymm1
+; AVX512-NEXT: vmovups 32, %xmm5
+; AVX512-NEXT: vpermi2ps %ymm5, %ymm1, %ymm2
+; AVX512-NEXT: vpermt2ps %ymm5, %ymm4, %ymm1
+; AVX512-NEXT: vxorps %xmm4, %xmm4, %xmm4
+; AVX512-NEXT: vcmpneqps %xmm4, %xmm3, %k1
+; AVX512-NEXT: vmovaps %xmm2, %xmm3 {%k1}
+; AVX512-NEXT: vmovaps %xmm1, %xmm0 {%k1}
+; AVX512-NEXT: vmovups %xmm0, (%rax)
+; AVX512-NEXT: vmovups %xmm3, (%rax)
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
entry:
%wide.vec = load <12 x float>, ptr undef, align 1
%strided.vec = shufflevector <12 x float> %wide.vec, <12 x float> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
More information about the llvm-commits
mailing list