[llvm] 0a1210e - [InstSimplify] try harder to fold fmul with 0.0 operand

Tue Oct 4 08:20:10 PDT 2022

Author: Sanjay Patel
Date: 2022-10-04T11:20:01-04:00
New Revision: 0a1210e482c7d8bd8d4bf1740a5175b29d05da17

URL: https://github.com/llvm/llvm-project/commit/0a1210e482c7d8bd8d4bf1740a5175b29d05da17
DIFF: https://github.com/llvm/llvm-project/commit/0a1210e482c7d8bd8d4bf1740a5175b29d05da17.diff

LOG: [InstSimplify] try harder to fold fmul with 0.0 operand

https://alive2.llvm.org/ce/z/oShzr3

This was noted as a missing fold in D134876 (with additional
examples based on issue #58046).

I'm assuming that fmul with a zero operand is rare enough
that the use of ValueTracking will not noticeably increase
compile-time.

This adjusts a PowerPC codegen test that was added with D88388
because it would get folded away and no longer provide coverage
for the bug fix.

Added: 
    

Modified: 
    llvm/lib/Analysis/InstructionSimplify.cpp
    llvm/test/CodeGen/PowerPC/pr47660.ll
    llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
    llvm/test/Transforms/InstSimplify/floating-point-compare.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 786ed03d476aa..7b2298443049a 100644

--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5333,9 +5333,16 @@ static Value *simplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
   if (match(Op1, m_FPOne()))
     return Op0;
 
-  // X * 0.0 --> 0.0 (with nnan and nsz)
-  if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZeroFP()))
-    return ConstantFP::getNullValue(Op0->getType());
+  if (match(Op1, m_AnyZeroFP())) {
+    // X * 0.0 --> 0.0 (with nnan and nsz)
+    if (FMF.noNaNs() && FMF.noSignedZeros())
+      return ConstantFP::getNullValue(Op0->getType());
+
+    // +normal number * (-)0.0 --> (-)0.0
+    if (isKnownNeverInfinity(Op0, Q.TLI) && isKnownNeverNaN(Op0, Q.TLI) &&
+        SignBitMustBeZero(Op0, Q.TLI))
+      return Op1;
+  }
 
   // sqrt(X) * sqrt(X) --> X, if we can:
   // 1. Remove the intermediate rounding (reassociate).

diff  --git a/llvm/test/CodeGen/PowerPC/pr47660.ll b/llvm/test/CodeGen/PowerPC/pr47660.ll
index 79f185d1f7b5c..05571d3de8c81 100644
--- a/llvm/test/CodeGen/PowerPC/pr47660.ll
+++ b/llvm/test/CodeGen/PowerPC/pr47660.ll
@@ -6,12 +6,13 @@
 ; RUN:   -mtriple=powerpc64-linux-gnu < %s | FileCheck \
 ; RUN:   -check-prefix=CHECK-BE %s
 
-define dso_local i24 @_Z1f1c(i24 %g.coerce) local_unnamed_addr #0 {
+define i8 @_Z1f1c(i24 %x) #0 {
 ; CHECK-LE-LABEL: _Z1f1c:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    clrlwi r3, r3, 24
-; CHECK-LE-NEXT:    xxlxor f1, f1, f1
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    clrlwi r3, r3, 8
 ; CHECK-LE-NEXT:    mtfprwz f0, r3
+; CHECK-LE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-LE-NEXT:    lfd f1, .LCPI0_0 at toc@l(r3)
 ; CHECK-LE-NEXT:    xscvuxddp f0, f0
 ; CHECK-LE-NEXT:    xsmuldp f0, f0, f1
 ; CHECK-LE-NEXT:    xscvdpsxws f0, f0
@@ -19,25 +20,22 @@ define dso_local i24 @_Z1f1c(i24 %g.coerce) local_unnamed_addr #0 {
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: _Z1f1c:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    clrldi r3, r3, 56
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    clrldi r3, r3, 40
 ; CHECK-BE-NEXT:    std r3, -16(r1)
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-BE-NEXT:    lfd f0, -16(r1)
-; CHECK-BE-NEXT:    lfs f1, .LCPI0_0 at toc@l(r3)
+; CHECK-BE-NEXT:    lfd f1, .LCPI0_0 at toc@l(r3)
 ; CHECK-BE-NEXT:    fcfid f0, f0
 ; CHECK-BE-NEXT:    fmul f0, f0, f1
 ; CHECK-BE-NEXT:    fctiwz f0, f0
 ; CHECK-BE-NEXT:    stfd f0, -8(r1)
 ; CHECK-BE-NEXT:    lwz r3, -4(r1)
 ; CHECK-BE-NEXT:    blr
-entry:
-  %0 = and i24 %g.coerce, 255
-  %conv1 = uitofp i24 %0 to double
-  %mul = fmul double 0.000000e+00, %conv1
-  %conv2 = fptoui double %mul to i8
-  %retval.sroa.0.0.insert.ext = zext i8 %conv2 to i24
-  ret i24 %retval.sroa.0.0.insert.ext
+  %conv1 = uitofp i24 %x to double
+  %mul = fmul double 0.1, %conv1
+  %r = fptoui double %mul to i8
+  ret i8 %r
 }
 
 attributes #0 = { "use-soft-float"="false" }

diff  --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
index 3b7bfef9606b0..d82e3bc3fada3 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -172,9 +172,7 @@ define double @fmul_X_1(double %a) {
 
 define half @fmul_nnan_ninf_nneg_0.0(i15 %x) {
 ; CHECK-LABEL: @fmul_nnan_ninf_nneg_0.0(
-; CHECK-NEXT:    [[F:%.*]] = uitofp i15 [[X:%.*]] to half
-; CHECK-NEXT:    [[R:%.*]] = fmul half [[F]], 0xH0000
-; CHECK-NEXT:    ret half [[R]]
+; CHECK-NEXT:    ret half 0xH0000
 ;
   %f = uitofp i15 %x to half
   %r = fmul half %f, 0.0
@@ -183,15 +181,15 @@ define half @fmul_nnan_ninf_nneg_0.0(i15 %x) {
 
 define half @fmul_nnan_ninf_nneg_n0.0(i15 %x) {
 ; CHECK-LABEL: @fmul_nnan_ninf_nneg_n0.0(
-; CHECK-NEXT:    [[F:%.*]] = uitofp i15 [[X:%.*]] to half
-; CHECK-NEXT:    [[R:%.*]] = fmul half [[F]], 0xH8000
-; CHECK-NEXT:    ret half [[R]]
+; CHECK-NEXT:    ret half 0xH8000
 ;
   %f = uitofp i15 %x to half
   %r = fmul half %f, -0.0
   ret half %r
 }
 
+; negative test - the int could be big enough to round to INF
+
 define half @fmul_nnan_nneg_0.0(i16 %x) {
 ; CHECK-LABEL: @fmul_nnan_nneg_0.0(
 ; CHECK-NEXT:    [[F:%.*]] = uitofp i16 [[X:%.*]] to half
@@ -205,10 +203,7 @@ define half @fmul_nnan_nneg_0.0(i16 %x) {
 
 define double @fmul_nnan_ninf_nneg_n0.0_commute(i127 %x) {
 ; CHECK-LABEL: @fmul_nnan_ninf_nneg_n0.0_commute(
-; CHECK-NEXT:    [[F:%.*]] = uitofp i127 [[X:%.*]] to float
-; CHECK-NEXT:    [[E:%.*]] = fpext float [[F]] to double
-; CHECK-NEXT:    [[R:%.*]] = fmul double -0.000000e+00, [[E]]
-; CHECK-NEXT:    ret double [[R]]
+; CHECK-NEXT:    ret double -0.000000e+00
 ;
   %f = uitofp i127 %x to float
   %e = fpext float %f to double
@@ -216,6 +211,8 @@ define double @fmul_nnan_ninf_nneg_n0.0_commute(i127 %x) {
   ret double %r
 }
 
+; negative test - the int could be big enough to round to INF
+
 define double @fmul_nnan_ninf_nneg_0.0_commute(i128 %x) {
 ; CHECK-LABEL: @fmul_nnan_ninf_nneg_0.0_commute(
 ; CHECK-NEXT:    [[F:%.*]] = uitofp i128 [[X:%.*]] to float

diff  --git a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll
index df0ad624f2084..7dc3842997b1c 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll
@@ -999,14 +999,9 @@ define <2 x i1> @known_positive_une_with_negative_constant_splat_vec(<2 x i32> %
   ret <2 x i1> %cmp
 }
 
-; TODO: This could fold to true.
 define i1 @pr58046(i64 %arg) {
 ; CHECK-LABEL: @pr58046(
-; CHECK-NEXT:    [[FP:%.*]] = uitofp i64 [[ARG:%.*]] to double
-; CHECK-NEXT:    [[MUL:%.*]] = fmul double -0.000000e+00, [[FP]]
-; CHECK-NEXT:    [[DIV:%.*]] = fdiv double 1.000000e+00, [[MUL]]
-; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq double [[DIV]], 0xFFF0000000000000
-; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK-NEXT:    ret i1 true
 ;
   %fp = uitofp i64 %arg to double
   %mul = fmul double -0.000000e+00, %fp