[PATCH] D96648: [InstCombine] fold fdiv with pow divisor (PR49147)

Sat Feb 13 08:17:31 PST 2021

spatel created this revision.
spatel added reviewers: RKSimon, lebedev.ri, cameron.mcinally.
Herald added subscribers: hiraditya, mcrosier.
spatel requested review of this revision.
Herald added a project: LLVM.

This is unusual in the general (non-reciprocal) case because we need an extra instruction, but that should be better for general FP reassociation and codegen. We conservatively check for "arcp" FMF here as we do with existing fdiv folds, but it is not strictly necessary to have that.

This is part of solving:
https://llvm.org/PR49147
(The powi variant potentially has a different constraint.)


https://reviews.llvm.org/D96648

Files:
  llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
  llvm/test/Transforms/InstCombine/fdiv.ll


Index: llvm/test/Transforms/InstCombine/fdiv.ll
===================================================================

--- llvm/test/Transforms/InstCombine/fdiv.ll
+++ llvm/test/Transforms/InstCombine/fdiv.ll
@@ -661,8 +661,9 @@
 
 define float @pow_divisor(float %x, float %y, float %z) {
 ; CHECK-LABEL: @pow_divisor(
-; CHECK-NEXT:    [[P:%.*]] = call float @llvm.pow.f32(float [[X:%.*]], float [[Y:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fdiv reassoc arcp float [[Z:%.*]], [[P]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fneg reassoc arcp float [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call reassoc arcp float @llvm.pow.f32(float [[X:%.*]], float [[TMP1]])
+; CHECK-NEXT:    [[R:%.*]] = fmul reassoc arcp float [[TMP2]], [[Z:%.*]]
 ; CHECK-NEXT:    ret float [[R]]
 ;
   %p = call float @llvm.pow.f32(float %x, float %y)
@@ -670,6 +671,8 @@
   ret float %r
 }
 
+; Negative test - don't create an extra pow
+
 define float @pow_divisor_extra_use(float %x, float %y, float %z) {
 ; CHECK-LABEL: @pow_divisor_extra_use(
 ; CHECK-NEXT:    [[P:%.*]] = call float @llvm.pow.f32(float [[X:%.*]], float [[Y:%.*]])
@@ -683,6 +686,8 @@
   ret float %r
 }
 
+; Negative test - must have reassoc+arcp
+
 define float @pow_divisor_not_enough_fmf(float %x, float %y, float %z) {
 ; CHECK-LABEL: @pow_divisor_not_enough_fmf(
 ; CHECK-NEXT:    [[P:%.*]] = call fast float @llvm.pow.f32(float [[X:%.*]], float [[Y:%.*]])
@@ -694,11 +699,13 @@
   ret float %r
 }
 
+; Special-case - reciprocal does not require extra fmul
+
 define <2 x half> @pow_recip(<2 x half> %x, <2 x half> %y) {
 ; CHECK-LABEL: @pow_recip(
-; CHECK-NEXT:    [[P:%.*]] = call <2 x half> @llvm.pow.v2f16(<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fdiv reassoc ninf arcp <2 x half> <half 0xH3C00, half 0xH3C00>, [[P]]
-; CHECK-NEXT:    ret <2 x half> [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fneg reassoc ninf arcp <2 x half> [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call reassoc ninf arcp <2 x half> @llvm.pow.v2f16(<2 x half> [[X:%.*]], <2 x half> [[TMP1]])
+; CHECK-NEXT:    ret <2 x half> [[TMP2]]
 ;
   %p = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y)
   %r = fdiv reassoc arcp ninf <2 x half> <half 1.0, half 1.0>, %p
Index: llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1325,6 +1325,17 @@
     // replaced by a multiplication.
     if (match(Op1, m_FDiv(m_SpecificFP(1.0), m_Value(Y))))
       return BinaryOperator::CreateFMulFMF(Y, Op0, &I);
+
+    // Negate the exponent of pow to fold division-by-pow() into multiply:
+    // Z / pow(X, Y) --> Z * pow(X, -Y)
+    // In the general case, this creates an extra instruction, but fmul allows
+    // for better canonicalization and optimization than fdiv.
+    if (match(Op1,
+              m_OneUse(m_Intrinsic<Intrinsic::pow>(m_Value(X), m_Value(Y))))) {
+      Value *NegY = Builder.CreateFNegFMF(Y, &I);
+      Value *Pow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, X, NegY, &I);
+      return BinaryOperator::CreateFMulFMF(Op0, Pow, &I);
+    }
   }
 
   if (I.hasAllowReassoc() && Op0->hasOneUse() && Op1->hasOneUse()) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D96648.323558.patch
Type: text/x-patch
Size: 3288 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210213/a04739f9/attachment.bin>