[llvm] 9e49d1d - [InstCombine] fma x, y, 0 -> fmul x, y

Tue Jun 30 12:00:36 PDT 2020

Author: David Green
Date: 2020-06-30T19:56:37+01:00
New Revision: 9e49d1d9b8702a568baf82e2549e246197ecc334

URL: https://github.com/llvm/llvm-project/commit/9e49d1d9b8702a568baf82e2549e246197ecc334
DIFF: https://github.com/llvm/llvm-project/commit/9e49d1d9b8702a568baf82e2549e246197ecc334.diff

LOG: [InstCombine] fma x, y, 0 -> fmul x, y

If the addend of the fma is zero, common sense would suggest that we can
convert fma x, y, 0.0 to fmul x, y. This comes up with some user code
that was expecting the first fma in an unrolled loop to simplify to a
fmul.

Floating point often does not follow naive common sense though. Alive
suggests that this should be guarded by nsz (as fadd -0.0, 0.0 = 0.0).
fma x, y, -0.0 is always valid.

Differential Revision: https://reviews.llvm.org/D82778

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/test/Transforms/InstCombine/fma.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 5c576bf7f079..ad06020dadf1 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2379,6 +2379,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       return FAdd;
     }
 
+    // fma x, y, 0 -> fmul x, y
+    // This is always valid for -0.0, but requires nsz for +0.0 as
+    // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
+    if (match(II->getArgOperand(2), m_NegZeroFP()) ||
+        (match(II->getArgOperand(2), m_PosZeroFP()) &&
+         II->getFastMathFlags().noSignedZeros()))
+      return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
+
     break;
   }
   case Intrinsic::copysign: {

diff  --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll
index a619d77a010e..6d9f8ea694c6 100644
--- a/llvm/test/Transforms/InstCombine/fma.ll
+++ b/llvm/test/Transforms/InstCombine/fma.ll
@@ -372,7 +372,7 @@ define float @fma_x_y_0(float %x, float %y) {
 
 define float @fma_x_y_0_nsz(float %x, float %y) {
 ; CHECK-LABEL: @fma_x_y_0_nsz(
-; CHECK-NEXT:    [[FMA:%.*]] = call nsz float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float 0.000000e+00)
+; CHECK-NEXT:    [[FMA:%.*]] = fmul nsz float [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret float [[FMA]]
 ;
   %fma = call nsz float @llvm.fma.f32(float %x, float %y, float 0.0)
@@ -390,7 +390,7 @@ define <8 x half> @fma_x_y_0_v(<8 x half> %x, <8 x half> %y) {
 
 define <8 x half> @fma_x_y_0_nsz_v(<8 x half> %x, <8 x half> %y) {
 ; CHECK-LABEL: @fma_x_y_0_nsz_v(
-; CHECK-NEXT:    [[FMA:%.*]] = call nsz <8 x half> @llvm.fma.v8f16(<8 x half> [[X:%.*]], <8 x half> [[Y:%.*]], <8 x half> zeroinitializer)
+; CHECK-NEXT:    [[FMA:%.*]] = fmul nsz <8 x half> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <8 x half> [[FMA]]
 ;
   %fma = call nsz <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %y, <8 x half> zeroinitializer)
@@ -408,7 +408,7 @@ define float @fmuladd_x_y_0(float %x, float %y) {
 
 define float @fmuladd_x_y_0_nsz(float %x, float %y) {
 ; CHECK-LABEL: @fmuladd_x_y_0_nsz(
-; CHECK-NEXT:    [[FMA:%.*]] = call nsz float @llvm.fmuladd.f32(float [[X:%.*]], float [[Y:%.*]], float 0.000000e+00)
+; CHECK-NEXT:    [[FMA:%.*]] = fmul nsz float [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret float [[FMA]]
 ;
   %fma = call nsz float @llvm.fmuladd.f32(float %x, float %y, float 0.0)
@@ -417,7 +417,7 @@ define float @fmuladd_x_y_0_nsz(float %x, float %y) {
 
 define float @fma_x_y_m0(float %x, float %y) {
 ; CHECK-LABEL: @fma_x_y_m0(
-; CHECK-NEXT:    [[FMA:%.*]] = call float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float -0.000000e+00)
+; CHECK-NEXT:    [[FMA:%.*]] = fmul float [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret float [[FMA]]
 ;
   %fma = call float @llvm.fma.f32(float %x, float %y, float -0.0)
@@ -426,7 +426,7 @@ define float @fma_x_y_m0(float %x, float %y) {
 
 define <8 x half> @fma_x_y_m0_v(<8 x half> %x, <8 x half> %y) {
 ; CHECK-LABEL: @fma_x_y_m0_v(
-; CHECK-NEXT:    [[FMA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[X:%.*]], <8 x half> [[Y:%.*]], <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>)
+; CHECK-NEXT:    [[FMA:%.*]] = fmul <8 x half> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <8 x half> [[FMA]]
 ;
   %fma = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %y, <8 x half> <half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0>)
@@ -435,7 +435,7 @@ define <8 x half> @fma_x_y_m0_v(<8 x half> %x, <8 x half> %y) {
 
 define float @fmuladd_x_y_m0(float %x, float %y) {
 ; CHECK-LABEL: @fmuladd_x_y_m0(
-; CHECK-NEXT:    [[FMA:%.*]] = call float @llvm.fmuladd.f32(float [[X:%.*]], float [[Y:%.*]], float -0.000000e+00)
+; CHECK-NEXT:    [[FMA:%.*]] = fmul float [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret float [[FMA]]
 ;
   %fma = call float @llvm.fmuladd.f32(float %x, float %y, float -0.0)