[llvm] fccd796 - [X86] Add tests showing failure of combineVectorCompareAndMaskUnaryOp to handle 'all-bits' general case

Sat May 9 06:25:07 PDT 2020

Author: Simon Pilgrim
Date: 2020-05-09T14:24:38+01:00
New Revision: fccd7965657505de85c5d747f1c33638ae916b0f

URL: https://github.com/llvm/llvm-project/commit/fccd7965657505de85c5d747f1c33638ae916b0f
DIFF: https://github.com/llvm/llvm-project/commit/fccd7965657505de85c5d747f1c33638ae916b0f.diff

LOG: [X86] Add tests showing failure of combineVectorCompareAndMaskUnaryOp to handle 'all-bits' general case

For the sint_to_fp(and(X,C)) -> and(X,sint_to_fp(C)) fold, combineVectorCompareAndMaskUnaryOp only matches X against SETCC (with an all-bits result) when really it could accept anything that ComputeNumSignBits says is all-bits.

Noticed while investigating mask promotion issues in PR45808

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll b/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
index 2e756e15744c..0274a8b45826 100644

--- a/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
+++ b/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
@@ -96,3 +96,47 @@ define void @foo4(<4 x float>* noalias %result) nounwind {
   store <4 x float> %val, <4 x float>* %result
   ret void
 }
+
+; TODO: Test when we're masking against a sign extended setcc.
+define <4 x float> @foo5(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: LCPI5_0:
+; CHECK-NEXT: .long 1                       ## 0x1
+; CHECK-NEXT: .long 0                       ## 0x0
+; CHECK-NEXT: .long 1                       ## 0x1
+; CHECK-NEXT: .long 0                       ## 0x0
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pcmpgtd %xmm1, %xmm0
+; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    cvtdq2ps %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %1 = icmp sgt <4 x i32> %a0, %a1
+  %2 = sext <4 x i1> %1 to <4 x i32>
+  %3 = and <4 x i32> %2, <i32 1, i32 0, i32 1, i32 0>
+  %4 = uitofp <4 x i32> %3 to <4 x float>
+  ret <4 x float> %4
+}
+
+; TODO: Test when we're masking against mask arithmetic, not the setcc's directly.
+define <4 x float> @foo6(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: LCPI6_0:
+; CHECK-NEXT: .long 1                       ## 0x1
+; CHECK-NEXT: .long 0                       ## 0x0
+; CHECK-NEXT: .long 1                       ## 0x1
+; CHECK-NEXT: .long 0                       ## 0x0
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    movdqa %xmm0, %xmm2
+; CHECK-NEXT:    pcmpgtd %xmm1, %xmm2
+; CHECK-NEXT:    pxor %xmm1, %xmm1
+; CHECK-NEXT:    pcmpgtd %xmm1, %xmm0
+; CHECK-NEXT:    pand %xmm2, %xmm0
+; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    cvtdq2ps %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %1 = icmp sgt <4 x i32> %a0, %a1
+  %2 = icmp sgt <4 x i32> %a0, zeroinitializer
+  %3 = and <4 x i1> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  %5 = and <4 x i32> %4, <i32 1, i32 0, i32 1, i32 0>
+  %6 = uitofp <4 x i32> %5 to <4 x float>
+  ret <4 x float> %6
+}
\ No newline at end of file