[llvm] ce9f2ab - [X86] Add mul(x,x) self-multiply known bits test coverage (PR48683)

Wed Jan 19 09:39:47 PST 2022

Author: Simon Pilgrim
Date: 2022-01-19T17:39:32Z
New Revision: ce9f2ab0cc4dc4d65dc37cecc14738dcc0a40411

URL: https://github.com/llvm/llvm-project/commit/ce9f2ab0cc4dc4d65dc37cecc14738dcc0a40411
DIFF: https://github.com/llvm/llvm-project/commit/ce9f2ab0cc4dc4d65dc37cecc14738dcc0a40411.diff

LOG: [X86] Add mul(x,x) self-multiply known bits test coverage (PR48683)

D108992 added self-multiply handling to KnownBits::mul but we don't use it yet..

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/combine-mul.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/combine-mul.ll b/llvm/test/CodeGen/X86/combine-mul.ll
index fb41347b13136..bfa9da5a95a9b 100644

--- a/llvm/test/CodeGen/X86/combine-mul.ll
+++ b/llvm/test/CodeGen/X86/combine-mul.ll
@@ -361,6 +361,45 @@ define <2 x i64> @combine_mul_to_abs_v2i64(<2 x i64> %x) {
   ret <2 x i64> %m
 }
 
+; 'Quadratic Reciprocity' - and(mul(x,x),2) -> 0
+
+define i64 @combine_mul_self_knownbits(i64 %x) {
+; SSE-LABEL: combine_mul_self_knownbits:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movq %rdi, %rax
+; SSE-NEXT:    imull %eax, %eax
+; SSE-NEXT:    andl $2, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_mul_self_knownbits:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movq %rdi, %rax
+; AVX-NEXT:    imull %eax, %eax
+; AVX-NEXT:    andl $2, %eax
+; AVX-NEXT:    retq
+  %1 = mul i64 %x, %x
+  %2 = and i64 %1, 2
+  ret i64 %2
+}
+
+define <4 x i32> @combine_mul_self_knownbits_vector(<4 x i32> %x) {
+; SSE-LABEL: combine_mul_self_knownbits_vector:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pmulld %xmm0, %xmm0
+; SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_mul_self_knownbits_vector:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpmulld %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
+; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = mul <4 x i32> %x, %x
+  %2 = and <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %2
+}
+
 ; This would infinite loop because DAGCombiner wants to turn this into a shift,
 ; but x86 lowering wants to avoid non-uniform vector shift amounts.