[llvm] c9e8c9e - [InstCombine] fold fmul/fdiv with fabs operands

Thu Jun 25 08:36:37 PDT 2020

Author: Sanjay Patel
Date: 2020-06-25T11:35:38-04:00
New Revision: c9e8c9e3ea2681dc18dd3a2d43d6aa8f37252649

URL: https://github.com/llvm/llvm-project/commit/c9e8c9e3ea2681dc18dd3a2d43d6aa8f37252649
DIFF: https://github.com/llvm/llvm-project/commit/c9e8c9e3ea2681dc18dd3a2d43d6aa8f37252649.diff

LOG: [InstCombine] fold fmul/fdiv with fabs operands

fabs(X) * fabs(Y) --> fabs(X * Y)
fabs(X) / fabs(Y) --> fabs(X / Y)

If both operands of fmul/fdiv are positive, then the result must be positive.

There's a NAN corner-case that prevents removing the more specific fold just
above this one:
fabs(X) * fabs(X) -> X * X
That fold works even with NAN because the sign-bit result of the multiply is
not specified if X is NAN.

We can't remove that and use the more general fold that is proposed here
because once we convert to this:
fabs (X * X)
...it is not legal to simplify the 'fabs' out of that expression when X is NAN.
That's because fabs() guarantees that the sign-bit is always cleared - even
for NAN values.

So this patch has the potential to lose information, but it seems unlikely if
we do the more specific fold ahead of this one.

Differential Revision: https://reviews.llvm.org/D82277

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineInternal.h
    llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
    llvm/test/Transforms/InstCombine/fdiv.ll
    llvm/test/Transforms/InstCombine/fmul.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index d256e77f9a59..8caf580a3fd6 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -635,6 +635,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
   Value *getSelectCondition(Value *A, Value *B);
 
   Instruction *foldIntrinsicWithOverflowCommon(IntrinsicInst *II);
+  Instruction *foldFPSignBitOps(BinaryOperator &I);
 
 public:
   /// Inserts an instruction \p New before instruction \p Old

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index cbbde9a1aeb8..2d4fdd3eeb40 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -402,7 +402,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   return Changed ? &I : nullptr;
 }
 
-static Instruction *foldFPSignBitOps(BinaryOperator &I) {
+Instruction *InstCombiner::foldFPSignBitOps(BinaryOperator &I) {
   BinaryOperator::BinaryOps Opcode = I.getOpcode();
   assert((Opcode == Instruction::FMul || Opcode == Instruction::FDiv) &&
          "Expected fmul or fdiv");
@@ -420,6 +420,19 @@ static Instruction *foldFPSignBitOps(BinaryOperator &I) {
   if (Op0 == Op1 && match(Op0, m_Intrinsic<Intrinsic::fabs>(m_Value(X))))
     return BinaryOperator::CreateWithCopiedFlags(Opcode, X, X, &I);
 
+  // fabs(X) * fabs(Y) --> fabs(X * Y)
+  // fabs(X) / fabs(Y) --> fabs(X / Y)
+  if (match(Op0, m_Intrinsic<Intrinsic::fabs>(m_Value(X))) &&
+      match(Op1, m_Intrinsic<Intrinsic::fabs>(m_Value(Y))) &&
+      (Op0->hasOneUse() || Op1->hasOneUse())) {
+    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+    Builder.setFastMathFlags(I.getFastMathFlags());
+    Value *XY = Builder.CreateBinOp(Opcode, X, Y);
+    Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, XY);
+    Fabs->takeName(&I);
+    return replaceInstUsesWith(I, Fabs);
+  }
+
   return nullptr;
 }
 

diff  --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll
index deceb724007e..7b1a556439b7 100644
--- a/llvm/test/Transforms/InstCombine/fdiv.ll
+++ b/llvm/test/Transforms/InstCombine/fdiv.ll
@@ -598,9 +598,8 @@ define float @fabs_same_op_extra_use(float %x) {
 
 define float @fabs_fabs(float %x, float %y) {
 ; CHECK-LABEL: @fabs_fabs(
-; CHECK-NEXT:    [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
-; CHECK-NEXT:    [[Y_FABS:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fdiv float [[X_FABS]], [[Y_FABS]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = call float @llvm.fabs.f32(float [[TMP1]])
 ; CHECK-NEXT:    ret float [[R]]
 ;
   %x.fabs = call float @llvm.fabs.f32(float %x)
@@ -613,8 +612,8 @@ define float @fabs_fabs_extra_use1(float %x, float %y) {
 ; CHECK-LABEL: @fabs_fabs_extra_use1(
 ; CHECK-NEXT:    [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
 ; CHECK-NEXT:    call void @use_f32(float [[X_FABS]])
-; CHECK-NEXT:    [[Y_FABS:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fdiv ninf float [[X_FABS]], [[Y_FABS]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv ninf float [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = call ninf float @llvm.fabs.f32(float [[TMP1]])
 ; CHECK-NEXT:    ret float [[R]]
 ;
   %x.fabs = call float @llvm.fabs.f32(float %x)
@@ -626,10 +625,10 @@ define float @fabs_fabs_extra_use1(float %x, float %y) {
 
 define float @fabs_fabs_extra_use2(float %x, float %y) {
 ; CHECK-LABEL: @fabs_fabs_extra_use2(
-; CHECK-NEXT:    [[X_FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X:%.*]])
 ; CHECK-NEXT:    [[Y_FABS:%.*]] = call fast float @llvm.fabs.f32(float [[Y:%.*]])
 ; CHECK-NEXT:    call void @use_f32(float [[Y_FABS]])
-; CHECK-NEXT:    [[R:%.*]] = fdiv reassoc ninf float [[X_FABS]], [[Y_FABS]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc ninf float [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = call reassoc ninf float @llvm.fabs.f32(float [[TMP1]])
 ; CHECK-NEXT:    ret float [[R]]
 ;
   %x.fabs = call fast float @llvm.fabs.f32(float %x)
@@ -639,6 +638,8 @@ define float @fabs_fabs_extra_use2(float %x, float %y) {
   ret float %r
 }
 
+; negative test - don't create an extra instruction
+
 define float @fabs_fabs_extra_use3(float %x, float %y) {
 ; CHECK-LABEL: @fabs_fabs_extra_use3(
 ; CHECK-NEXT:    [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])

diff  --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll
index 0559a7907de0..8e168f252978 100644
--- a/llvm/test/Transforms/InstCombine/fmul.ll
+++ b/llvm/test/Transforms/InstCombine/fmul.ll
@@ -482,9 +482,8 @@ define float @fabs_squared_fast(float %x) {
 
 define float @fabs_fabs(float %x, float %y) {
 ; CHECK-LABEL: @fabs_fabs(
-; CHECK-NEXT:    [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
-; CHECK-NEXT:    [[Y_FABS:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]])
-; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[X_FABS]], [[Y_FABS]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = call float @llvm.fabs.f32(float [[TMP1]])
 ; CHECK-NEXT:    ret float [[MUL]]
 ;
   %x.fabs = call float @llvm.fabs.f32(float %x)
@@ -497,8 +496,8 @@ define float @fabs_fabs_extra_use1(float %x, float %y) {
 ; CHECK-LABEL: @fabs_fabs_extra_use1(
 ; CHECK-NEXT:    [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
 ; CHECK-NEXT:    call void @use_f32(float [[X_FABS]])
-; CHECK-NEXT:    [[Y_FABS:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]])
-; CHECK-NEXT:    [[MUL:%.*]] = fmul ninf float [[X_FABS]], [[Y_FABS]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul ninf float [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = call ninf float @llvm.fabs.f32(float [[TMP1]])
 ; CHECK-NEXT:    ret float [[MUL]]
 ;
   %x.fabs = call float @llvm.fabs.f32(float %x)
@@ -510,10 +509,10 @@ define float @fabs_fabs_extra_use1(float %x, float %y) {
 
 define float @fabs_fabs_extra_use2(float %x, float %y) {
 ; CHECK-LABEL: @fabs_fabs_extra_use2(
-; CHECK-NEXT:    [[X_FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X:%.*]])
 ; CHECK-NEXT:    [[Y_FABS:%.*]] = call fast float @llvm.fabs.f32(float [[Y:%.*]])
 ; CHECK-NEXT:    call void @use_f32(float [[Y_FABS]])
-; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc ninf float [[X_FABS]], [[Y_FABS]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc ninf float [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[MUL:%.*]] = call reassoc ninf float @llvm.fabs.f32(float [[TMP1]])
 ; CHECK-NEXT:    ret float [[MUL]]
 ;
   %x.fabs = call fast float @llvm.fabs.f32(float %x)
@@ -523,6 +522,8 @@ define float @fabs_fabs_extra_use2(float %x, float %y) {
   ret float %mul
 }
 
+; negative test - don't create an extra instruction
+
 define float @fabs_fabs_extra_use3(float %x, float %y) {
 ; CHECK-LABEL: @fabs_fabs_extra_use3(
 ; CHECK-NEXT:    [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])