[llvm] fd42a4a - [X86][SSE] Add add(shl(and(x, c1), c2), c3) test case with non-uniform shift value
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 14 03:42:10 PST 2020
Author: Simon Pilgrim
Date: 2020-01-14T11:41:47Z
New Revision: fd42a4ac7a69adb92f87c7fa927509f177dcc6ca
URL: https://github.com/llvm/llvm-project/commit/fd42a4ac7a69adb92f87c7fa927509f177dcc6ca
DIFF: https://github.com/llvm/llvm-project/commit/fd42a4ac7a69adb92f87c7fa927509f177dcc6ca.diff
LOG: [X86][SSE] Add add(shl(and(x,c1),c2),c3) test case with non-uniform shift value
As mentioned by @nikic on rGef5debac4302, we should merge the guaranteed top zero bits from the shifted value and min shift amount code so they can both set the high bits to zero.
Added:
Modified:
llvm/test/CodeGen/X86/combine-shl.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/combine-shl.ll b/llvm/test/CodeGen/X86/combine-shl.ll
index 0fb4d67fd885..ae31dc41e343 100644
--- a/llvm/test/CodeGen/X86/combine-shl.ll
+++ b/llvm/test/CodeGen/X86/combine-shl.ll
@@ -865,6 +865,43 @@ define <4 x i32> @combine_vec_add_shl_nonsplat(<4 x i32> %a0) {
ret <4 x i32> %2
}
+define <4 x i32> @combine_vec_add_shl_and_nonsplat(<4 x i32> %a0) {
+; SSE2-LABEL: combine_vec_add_shl_and_nonsplat:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4,8,16,32]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE2-NEXT: pmuludq %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT: pmuludq %xmm2, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: paddd {{.*}}(%rip), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: combine_vec_add_shl_and_nonsplat:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
+; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
+; SSE41-NEXT: por {{.*}}(%rip), %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: combine_vec_add_shl_and_nonsplat:
+; AVX: # %bb.0:
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
+; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
+; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = and <4 x i32> %a0, <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760>
+ %2 = shl <4 x i32> %1, <i32 2, i32 3, i32 4, i32 5>
+ %3 = add <4 x i32> %2, <i32 15, i32 15, i32 15, i32 15>
+ ret <4 x i32> %3
+}
+
define <4 x i32> @combine_vec_add_shuffle_shl(<4 x i32> %a0) {
; SSE2-LABEL: combine_vec_add_shuffle_shl:
; SSE2: # %bb.0:
More information about the llvm-commits
mailing list