[PATCH] D82778: [InstCombine] fma x, y, 0 -> fmul x, y

Tue Jun 30 06:28:36 PDT 2020

dmgreen updated this revision to Diff 274444.
dmgreen added a comment.

Thanks for the extra info. I ran things overnight and it said this for -0.0

  Processing fma.txt..
  
  ----------------------------------------
    %ys = fma half %x, half %y, half -0.000000
    ret half %ys
  =>
    %ys = fmul half %x, %y
    ret half %ys
  
  Done: 1
  Transformation seems to be correct!
  
  real    136m22.357s
  user    117m17.118s
  sys     0m7.495s

Which looks good. I've extended the pattern to match and added some tests.

Unfortunately still this for +0.0:

  Processing fma.txt..
  
  ----------------------------------------
    %ys = fma nsz half %x, half %y, half 0.000000
    ret half %ys
  =>
    %ys = fmul half %x, %y
    ret half %ys
  
  ERROR: SMT Error: smt tactic failed to show goal to be sat/unsat memout
  
  real    152m39.828s
  user    147m53.980s
  sys     0m6.067s

But I'm fairly confident it's OK given nsz and the result for -0.0.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82778/new/

https://reviews.llvm.org/D82778

Files:
  llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
  llvm/test/Transforms/InstCombine/fma.ll


Index: llvm/test/Transforms/InstCombine/fma.ll
===================================================================

--- llvm/test/Transforms/InstCombine/fma.ll
+++ llvm/test/Transforms/InstCombine/fma.ll
@@ -372,7 +372,7 @@
 
 define float @fma_x_y_0_nsz(float %x, float %y) {
 ; CHECK-LABEL: @fma_x_y_0_nsz(
-; CHECK-NEXT:    [[FMA:%.*]] = call nsz float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float 0.000000e+00)
+; CHECK-NEXT:    [[FMA:%.*]] = fmul nsz float [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret float [[FMA]]
 ;
   %fma = call nsz float @llvm.fma.f32(float %x, float %y, float 0.0)
@@ -390,7 +390,7 @@
 
 define <8 x half> @fma_x_y_0_nsz_v(<8 x half> %x, <8 x half> %y) {
 ; CHECK-LABEL: @fma_x_y_0_nsz_v(
-; CHECK-NEXT:    [[FMA:%.*]] = call nsz <8 x half> @llvm.fma.v8f16(<8 x half> [[X:%.*]], <8 x half> [[Y:%.*]], <8 x half> zeroinitializer)
+; CHECK-NEXT:    [[FMA:%.*]] = fmul nsz <8 x half> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <8 x half> [[FMA]]
 ;
   %fma = call nsz <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %y, <8 x half> zeroinitializer)
@@ -408,7 +408,7 @@
 
 define float @fmuladd_x_y_0_nsz(float %x, float %y) {
 ; CHECK-LABEL: @fmuladd_x_y_0_nsz(
-; CHECK-NEXT:    [[FMA:%.*]] = call nsz float @llvm.fmuladd.f32(float [[X:%.*]], float [[Y:%.*]], float 0.000000e+00)
+; CHECK-NEXT:    [[FMA:%.*]] = fmul nsz float [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret float [[FMA]]
 ;
   %fma = call nsz float @llvm.fmuladd.f32(float %x, float %y, float 0.0)
@@ -417,7 +417,7 @@
 
 define float @fma_x_y_m0(float %x, float %y) {
 ; CHECK-LABEL: @fma_x_y_m0(
-; CHECK-NEXT:    [[FMA:%.*]] = call float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float -0.000000e+00)
+; CHECK-NEXT:    [[FMA:%.*]] = fmul float [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret float [[FMA]]
 ;
   %fma = call float @llvm.fma.f32(float %x, float %y, float -0.0)
@@ -426,7 +426,7 @@
 
 define <8 x half> @fma_x_y_m0_v(<8 x half> %x, <8 x half> %y) {
 ; CHECK-LABEL: @fma_x_y_m0_v(
-; CHECK-NEXT:    [[FMA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[X:%.*]], <8 x half> [[Y:%.*]], <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>)
+; CHECK-NEXT:    [[FMA:%.*]] = fmul <8 x half> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <8 x half> [[FMA]]
 ;
   %fma = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %y, <8 x half> <half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0>)
@@ -435,7 +435,7 @@
 
 define float @fmuladd_x_y_m0(float %x, float %y) {
 ; CHECK-LABEL: @fmuladd_x_y_m0(
-; CHECK-NEXT:    [[FMA:%.*]] = call float @llvm.fmuladd.f32(float [[X:%.*]], float [[Y:%.*]], float -0.000000e+00)
+; CHECK-NEXT:    [[FMA:%.*]] = fmul float [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret float [[FMA]]
 ;
   %fma = call float @llvm.fmuladd.f32(float %x, float %y, float -0.0)
Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2379,6 +2379,14 @@
       return FAdd;
     }
 
+    // fma x, y, 0 -> fmul x, y
+    // This is always valid for -0.0, but requires nsz for +0.0 as
+    // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
+    if (match(II->getArgOperand(2), m_NegZeroFP()) ||
+        (match(II->getArgOperand(2), m_PosZeroFP()) &&
+         II->getFastMathFlags().noSignedZeros()))
+      return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
+
     break;
   }
   case Intrinsic::copysign: {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D82778.274444.patch
Type: text/x-patch
Size: 3659 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200630/b549f234/attachment.bin>