[llvm] b40fde0 - [InstCombine] fold fdiv with pow divisor (PR49147)

Sun Feb 14 05:19:53 PST 2021

Author: Sanjay Patel
Date: 2021-02-14T08:07:36-05:00
New Revision: b40fde062c306584d88243de11175187e754ce3b

URL: https://github.com/llvm/llvm-project/commit/b40fde062c306584d88243de11175187e754ce3b
DIFF: https://github.com/llvm/llvm-project/commit/b40fde062c306584d88243de11175187e754ce3b.diff

LOG: [InstCombine] fold fdiv with pow divisor (PR49147)

This is unusual in the general (non-reciprocal) case because we need
an extra instruction, but that should be better for general FP
reassociation and codegen. We conservatively check for "arcp" FMF
here as we do with existing fdiv folds, but it is not strictly
necessary to have that.

This is part of solving:
https://llvm.org/PR49147
(The powi variant potentially has a different constraint.)

Differential Revision: https://reviews.llvm.org/D96648

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
    llvm/test/Transforms/InstCombine/fdiv.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 5526739a016f..9bc566ed3523 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1325,6 +1325,17 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
     // replaced by a multiplication.
     if (match(Op1, m_FDiv(m_SpecificFP(1.0), m_Value(Y))))
       return BinaryOperator::CreateFMulFMF(Y, Op0, &I);
+
+    // Negate the exponent of pow to fold division-by-pow() into multiply:
+    // Z / pow(X, Y) --> Z * pow(X, -Y)
+    // In the general case, this creates an extra instruction, but fmul allows
+    // for better canonicalization and optimization than fdiv.
+    if (match(Op1,
+              m_OneUse(m_Intrinsic<Intrinsic::pow>(m_Value(X), m_Value(Y))))) {
+      Value *NegY = Builder.CreateFNegFMF(Y, &I);
+      Value *Pow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, X, NegY, &I);
+      return BinaryOperator::CreateFMulFMF(Op0, Pow, &I);
+    }
   }
 
   if (I.hasAllowReassoc() && Op0->hasOneUse() && Op1->hasOneUse()) {

diff  --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll
index f1858cb7bb02..52bf746dd369 100644
--- a/llvm/test/Transforms/InstCombine/fdiv.ll
+++ b/llvm/test/Transforms/InstCombine/fdiv.ll
@@ -661,8 +661,9 @@ define float @fabs_fabs_extra_use3(float %x, float %y) {
 
 define float @pow_divisor(float %x, float %y, float %z) {
 ; CHECK-LABEL: @pow_divisor(
-; CHECK-NEXT:    [[P:%.*]] = call float @llvm.pow.f32(float [[X:%.*]], float [[Y:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fdiv reassoc arcp float [[Z:%.*]], [[P]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fneg reassoc arcp float [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call reassoc arcp float @llvm.pow.f32(float [[X:%.*]], float [[TMP1]])
+; CHECK-NEXT:    [[R:%.*]] = fmul reassoc arcp float [[TMP2]], [[Z:%.*]]
 ; CHECK-NEXT:    ret float [[R]]
 ;
   %p = call float @llvm.pow.f32(float %x, float %y)
@@ -670,6 +671,8 @@ define float @pow_divisor(float %x, float %y, float %z) {
   ret float %r
 }
 
+; Negative test - don't create an extra pow
+
 define float @pow_divisor_extra_use(float %x, float %y, float %z) {
 ; CHECK-LABEL: @pow_divisor_extra_use(
 ; CHECK-NEXT:    [[P:%.*]] = call float @llvm.pow.f32(float [[X:%.*]], float [[Y:%.*]])
@@ -683,6 +686,8 @@ define float @pow_divisor_extra_use(float %x, float %y, float %z) {
   ret float %r
 }
 
+; Negative test - must have reassoc+arcp
+
 define float @pow_divisor_not_enough_fmf(float %x, float %y, float %z) {
 ; CHECK-LABEL: @pow_divisor_not_enough_fmf(
 ; CHECK-NEXT:    [[P:%.*]] = call fast float @llvm.pow.f32(float [[X:%.*]], float [[Y:%.*]])
@@ -694,6 +699,8 @@ define float @pow_divisor_not_enough_fmf(float %x, float %y, float %z) {
   ret float %r
 }
 
+; Negative test - must have reassoc+arcp
+
 define float @pow_divisor_not_enough_fmf2(float %x, float %y, float %z) {
 ; CHECK-LABEL: @pow_divisor_not_enough_fmf2(
 ; CHECK-NEXT:    [[P:%.*]] = call fast float @llvm.pow.f32(float [[X:%.*]], float [[Y:%.*]])
@@ -705,11 +712,13 @@ define float @pow_divisor_not_enough_fmf2(float %x, float %y, float %z) {
   ret float %r
 }
 
+; Special-case - reciprocal does not require extra fmul
+
 define <2 x half> @pow_recip(<2 x half> %x, <2 x half> %y) {
 ; CHECK-LABEL: @pow_recip(
-; CHECK-NEXT:    [[P:%.*]] = call <2 x half> @llvm.pow.v2f16(<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fdiv reassoc ninf arcp <2 x half> <half 0xH3C00, half 0xH3C00>, [[P]]
-; CHECK-NEXT:    ret <2 x half> [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fneg reassoc ninf arcp <2 x half> [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call reassoc ninf arcp <2 x half> @llvm.pow.v2f16(<2 x half> [[X:%.*]], <2 x half> [[TMP1]])
+; CHECK-NEXT:    ret <2 x half> [[TMP2]]
 ;
   %p = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y)
   %r = fdiv reassoc arcp ninf <2 x half> <half 1.0, half 1.0>, %p