[llvm] 778562a - [X86][AVX] Add v4i64 shift-by-32 tests
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed May 12 08:44:54 PDT 2021
Author: Simon Pilgrim
Date: 2021-05-12T16:42:18+01:00
New Revision: 778562ada39f5353b735c4ac204eddedb072a94b
URL: https://github.com/llvm/llvm-project/commit/778562ada39f5353b735c4ac204eddedb072a94b
DIFF: https://github.com/llvm/llvm-project/commit/778562ada39f5353b735c4ac204eddedb072a94b.diff
LOG: [X86][AVX] Add v4i64 shift-by-32 tests
AVX1 could perform this as a v8f32 shuffle instead of splitting - based off PR46621
Added:
Modified:
llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
llvm/test/CodeGen/X86/vector-shift-shl-256.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
index 81cadf70f446..0f1f23ed8e7d 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -1659,3 +1659,77 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
%shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <32 x i8> %shift
}
+
+;
+; Special Cases
+;
+
+define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind {
+; AVX1-LABEL: shift32_v4i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
+; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shift32_v4i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1
+; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: shift32_v4i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [18446744073709551584,18446744073709551584]
+; XOPAVX1-NEXT: vpshaq %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpshaq %xmm2, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: shift32_v4i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpsrad $31, %ymm0, %ymm1
+; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: shift32_v4i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512-NEXT: vpsraq $32, %zmm0, %zmm0
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: shift32_v4i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsraq $32, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; X86-AVX1-LABEL: shift32_v4i64:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; X86-AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
+; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
+; X86-AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
+; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: shift32_v4i64:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vpsrad $31, %ymm0, %ymm1
+; X86-AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; X86-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; X86-AVX2-NEXT: retl
+ %shift = ashr <4 x i64> %a, <i64 32, i64 32, i64 32, i64 32>
+ ret <4 x i64> %shift
+}
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
index 36d2470fac9d..9fd0960c16b2 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
@@ -1390,6 +1390,63 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
ret <32 x i8> %shift
}
+;
+; Special Cases
+;
+
+define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind {
+; AVX1-LABEL: shift32_v4i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shift32_v4i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: shift32_v4i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpsrlq $32, %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: shift32_v4i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: shift32_v4i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsrlq $32, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: shift32_v4i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; X86-AVX1-LABEL: shift32_v4i64:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm1
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: shift32_v4i64:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %shift = lshr <4 x i64> %a, <i64 32, i64 32, i64 32, i64 32>
+ ret <4 x i64> %shift
+}
+
define <4 x i32> @sh_trunc_sh_vec(<4 x i64> %x) {
; AVX1-LABEL: sh_trunc_sh_vec:
; AVX1: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
index da3cebc47586..0af23983b817 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
@@ -1298,3 +1298,60 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
%shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <32 x i8> %shift
}
+
+;
+; Special Cases
+;
+
+define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind {
+; AVX1-LABEL: shift32_v4i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsllq $32, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shift32_v4i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: shift32_v4i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpsllq $32, %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vpsllq $32, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: shift32_v4i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpsllq $32, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: shift32_v4i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsllq $32, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: shift32_v4i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsllq $32, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; X86-AVX1-LABEL: shift32_v4i64:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vpsllq $32, %xmm0, %xmm1
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: shift32_v4i64:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %shift = shl <4 x i64> %a, <i64 32, i64 32, i64 32, i64 32>
+ ret <4 x i64> %shift
+}
More information about the llvm-commits
mailing list