[llvm] r332869 - [X86] Add test cases for missed vector rotate matching due to SimplifyDemandedBits interfering with the AND masks
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon May 21 12:27:50 PDT 2018
Author: ctopper
Date: Mon May 21 12:27:50 2018
New Revision: 332869
URL: http://llvm.org/viewvc/llvm-project?rev=332869&view=rev
Log:
[X86] Add test cases for missed vector rotate matching due to SimplifyDemandedBits interfering with the AND masks
As requested in D47116
Modified:
llvm/trunk/test/CodeGen/X86/combine-rotates.ll
Modified: llvm/trunk/test/CodeGen/X86/combine-rotates.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-rotates.ll?rev=332869&r1=332868&r2=332869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-rotates.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-rotates.ll Mon May 21 12:27:50 2018
@@ -57,3 +57,117 @@ define <4 x i32> @combine_vec_rot_rot_sp
%6 = or <4 x i32> %4, %5
ret <4 x i32> %6
}
+
+define <4 x i32> @rotate_demanded_bits(<4 x i32>, <4 x i32>) {
+; CHECK-LABEL: rotate_demanded_bits:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andb $30, %sil
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: roll %cl, %edi
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+; XOP-LABEL: rotate_demanded_bits:
+; XOP: # %bb.0:
+; XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [30,30,30,30]
+; XOP-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOP-NEXT: vpshld %xmm1, %xmm0, %xmm3
+; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; XOP-NEXT: vpsubd %xmm1, %xmm4, %xmm1
+; XOP-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOP-NEXT: vpsubd %xmm1, %xmm4, %xmm1
+; XOP-NEXT: vpshld %xmm1, %xmm0, %xmm0
+; XOP-NEXT: vpor %xmm3, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: rotate_demanded_bits:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [30,30,30,30]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm3
+; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX512-NEXT: vpsubd %xmm1, %xmm4, %xmm1
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpor %xmm3, %xmm0, %xmm0
+; AVX512-NEXT: retq
+ %3 = and <4 x i32> %1, <i32 30, i32 30, i32 30, i32 30>
+ %4 = shl <4 x i32> %0, %3
+ %5 = sub nsw <4 x i32> zeroinitializer, %3
+ %6 = and <4 x i32> %5, <i32 30, i32 30, i32 30, i32 30>
+ %7 = lshr <4 x i32> %0, %6
+ %8 = or <4 x i32> %7, %4
+ ret <4 x i32> %8
+}
+
+define <4 x i32> @rotate_demanded_bits_2(<4 x i32>, <4 x i32>) {
+; CHECK-LABEL: rotate_demanded_bits_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andb $23, %sil
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: roll %cl, %edi
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+; XOP-LABEL: rotate_demanded_bits_2:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vprotd %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: rotate_demanded_bits_2:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512-NEXT: vprolvd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+ %3 = and <4 x i32> %1, <i32 23, i32 23, i32 23, i32 23>
+ %4 = shl <4 x i32> %0, %3
+ %5 = sub nsw <4 x i32> zeroinitializer, %3
+ %6 = and <4 x i32> %5, <i32 31, i32 31, i32 31, i32 31>
+ %7 = lshr <4 x i32> %0, %6
+ %8 = or <4 x i32> %7, %4
+ ret <4 x i32> %8
+}
+
+define <4 x i32> @rotate_demanded_bits_3(<4 x i32>, <4 x i32>) {
+; CHECK-LABEL: rotate_demanded_bits_3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addb %sil, %sil
+; CHECK-NEXT: andb $30, %sil
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: roll %cl, %edi
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+; XOP-LABEL: rotate_demanded_bits_3:
+; XOP: # %bb.0:
+; XOP-NEXT: vpaddd %xmm1, %xmm1, %xmm1
+; XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [30,30,30,30]
+; XOP-NEXT: vpand %xmm2, %xmm1, %xmm3
+; XOP-NEXT: vpshld %xmm3, %xmm0, %xmm3
+; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; XOP-NEXT: vpsubd %xmm1, %xmm4, %xmm1
+; XOP-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOP-NEXT: vpsubd %xmm1, %xmm4, %xmm1
+; XOP-NEXT: vpshld %xmm1, %xmm0, %xmm0
+; XOP-NEXT: vpor %xmm0, %xmm3, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: rotate_demanded_bits_3:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpaddd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [30,30,30,30]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX512-NEXT: vpsllvd %xmm3, %xmm0, %xmm3
+; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX512-NEXT: vpsubd %xmm1, %xmm4, %xmm1
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpor %xmm0, %xmm3, %xmm0
+; AVX512-NEXT: retq
+ %3 = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+ %4 = and <4 x i32> %3, <i32 30, i32 30, i32 30, i32 30>
+ %5 = shl <4 x i32> %0, %4
+ %6 = sub <4 x i32> zeroinitializer, %3
+ %7 = and <4 x i32> %6, <i32 30, i32 30, i32 30, i32 30>
+ %8 = lshr <4 x i32> %0, %7
+ %9 = or <4 x i32> %5, %8
+ ret <4 x i32> %9
+}
More information about the llvm-commits
mailing list