[llvm] 63e338b - [X86][SSE] Show isNegatibleForFree inability to peek through X86ISD::FRCP

Sat Feb 8 02:42:17 PST 2020

Author: Simon Pilgrim
Date: 2020-02-08T10:40:49Z
New Revision: 63e338be2cc6a4ab7a95ca7cf264928d41778ff4

URL: https://github.com/llvm/llvm-project/commit/63e338be2cc6a4ab7a95ca7cf264928d41778ff4
DIFF: https://github.com/llvm/llvm-project/commit/63e338be2cc6a4ab7a95ca7cf264928d41778ff4.diff

LOG: [X86][SSE] Show isNegatibleForFree inability to peek through X86ISD::FRCP

We can safely negate the input of RCP but we can't peek through it.

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/fma-fneg-combine-2.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/fma-fneg-combine-2.ll b/llvm/test/CodeGen/X86/fma-fneg-combine-2.ll
index cf33c0c8c427..3aadee8a317e 100644

--- a/llvm/test/CodeGen/X86/fma-fneg-combine-2.ll
+++ b/llvm/test/CodeGen/X86/fma-fneg-combine-2.ll
@@ -86,6 +86,28 @@ entry:
   ret float %1
 }
 
+define <4 x float> @test_fma_rcp_fneg_v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z)  {
+; FMA3-LABEL: test_fma_rcp_fneg_v4f32:
+; FMA3:       # %bb.0: # %entry
+; FMA3-NEXT:    vxorps {{.*}}(%rip), %xmm2, %xmm2
+; FMA3-NEXT:    vrcpps %xmm2, %xmm2
+; FMA3-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
+; FMA3-NEXT:    retq
+;
+; FMA4-LABEL: test_fma_rcp_fneg_v4f32:
+; FMA4:       # %bb.0: # %entry
+; FMA4-NEXT:    vxorps {{.*}}(%rip), %xmm2, %xmm2
+; FMA4-NEXT:    vrcpps %xmm2, %xmm2
+; FMA4-NEXT:    vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+entry:
+  %0 = fneg <4 x float> %z
+  %1 = tail call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %0)
+  %2 = tail call nsz <4 x float> @llvm.fma.v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %1)
+  ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>)
+
 ; This would crash while trying getNegatedExpression().
 
 define float @negated_constant(float %x) {
@@ -107,3 +129,4 @@ define float @negated_constant(float %x) {
 }
 
 declare float @llvm.fma.f32(float, float, float)
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)