[llvm] [InstCombine] Optimize powi(X, Y)/ (X * Z) with Ofast (PR #87047)

Sat Apr 20 21:11:05 PDT 2024

https://github.com/vfdff updated https://github.com/llvm/llvm-project/pull/87047

>From cb7cb83010bbcd8e5325d81b6d80653c7b513516 Mon Sep 17 00:00:00 2001
From: zhongyunde 00443407 <zhongyunde at huawei.com>
Date: Thu, 28 Mar 2024 21:39:30 -0400
Subject: [PATCH 1/2] [InstCombine] Add check to avoid dependent optimization
 order, NFC

Since PR86428, foldPowiReassoc is called by both FMul and FDiv,
as the optimization of FDiv is placed after the FMul, so now
it is correct we don't add the checking of FDiv for powi(X, Y) / X.
But, we may add more matching scenarios later, so add the checking opcode
explicitly is easier to understand.
---
 llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index a0333b7db8f7a9..000d33a091970a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -582,6 +582,9 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
   };
 
   Value *X, *Y, *Z;
+  unsigned Opcode = I.getOpcode();
+  assert((Opcode == Instruction::FMul || Opcode == Instruction::FDiv) &&
+         "Unexpected opcode");
 
   // powi(X, Y) * X --> powi(X, Y+1)
   // X * powi(X, Y) --> powi(X, Y+1)
@@ -596,7 +599,7 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
   // powi(x, y) * powi(x, z) -> powi(x, y + z)
   Value *Op0 = I.getOperand(0);
   Value *Op1 = I.getOperand(1);
-  if (I.isOnlyUserOfAnyOperand() &&
+  if (Opcode == Instruction::FMul && I.isOnlyUserOfAnyOperand() &&
       match(Op0, m_AllowReassoc(
                      m_Intrinsic<Intrinsic::powi>(m_Value(X), m_Value(Y)))) &&
       match(Op1, m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(m_Specific(X),
@@ -608,7 +611,7 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
   // This is legal when (Y - 1) can't wraparound, in which case reassoc and nnan
   // are required.
   // TODO: Multi-use may be also better off creating Powi(x,y-1)
-  if (I.hasAllowReassoc() && I.hasNoNaNs() &&
+  if (Opcode == Instruction::FDiv && I.hasAllowReassoc() && I.hasNoNaNs() &&
       match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
                      m_Specific(Op1), m_Value(Y))))) &&
       willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {

>From 56ca5ecf416ad0e57c5e3558159bd73e5d662476 Mon Sep 17 00:00:00 2001
From: zhongyunde 00443407 <zhongyunde at huawei.com>
Date: Thu, 28 Mar 2024 22:18:37 -0400
Subject: [PATCH 2/2] [InstCombine] Optimize powi(X, Y)/ (X * Z) with Ofast

foldFDivPowDivisor can address A / powi(x, y) to A * powi(x, -y),
while for small const value y, for example y=2, the instcombine will
transform powi(x, 2) to fmul x, x, so it is not optimal for A / powi(x, 2).

Fix https://github.com/llvm/llvm-project/issues/77171
---
 .../InstCombine/InstCombineMulDivRem.cpp      |  54 +++++---
 llvm/test/Transforms/InstCombine/powi.ll      | 115 ++++++++++++++++++
 2 files changed, 152 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 000d33a091970a..4ed4c36e21e016 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -576,9 +576,10 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
                            Value *Y, Value *Z) {
     InstCombiner::BuilderTy &Builder = IC.Builder;
     Value *YZ = Builder.CreateAdd(Y, Z);
-    auto *NewPow = Builder.CreateIntrinsic(
+    Instruction *NewPow = Builder.CreateIntrinsic(
         Intrinsic::powi, {X->getType(), YZ->getType()}, {X, YZ}, &I);
-    return IC.replaceInstUsesWith(I, NewPow);
+
+    return NewPow;
   };
 
   Value *X, *Y, *Z;
@@ -592,8 +593,10 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
                              m_Value(X), m_Value(Y)))),
                          m_Deferred(X)))) {
     Constant *One = ConstantInt::get(Y->getType(), 1);
-    if (willNotOverflowSignedAdd(Y, One, I))
-      return createPowiExpr(I, *this, X, Y, One);
+    if (willNotOverflowSignedAdd(Y, One, I)) {
+      Instruction *NewPow = createPowiExpr(I, *this, X, Y, One);
+      return replaceInstUsesWith(I, NewPow);
+    }
   }
 
   // powi(x, y) * powi(x, z) -> powi(x, y + z)
@@ -604,19 +607,36 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
                      m_Intrinsic<Intrinsic::powi>(m_Value(X), m_Value(Y)))) &&
       match(Op1, m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(m_Specific(X),
                                                              m_Value(Z)))) &&
-      Y->getType() == Z->getType())
-    return createPowiExpr(I, *this, X, Y, Z);
-
-  // powi(X, Y) / X --> powi(X, Y-1)
-  // This is legal when (Y - 1) can't wraparound, in which case reassoc and nnan
-  // are required.
-  // TODO: Multi-use may be also better off creating Powi(x,y-1)
-  if (Opcode == Instruction::FDiv && I.hasAllowReassoc() && I.hasNoNaNs() &&
-      match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
-                     m_Specific(Op1), m_Value(Y))))) &&
-      willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
-    Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
-    return createPowiExpr(I, *this, Op1, Y, NegOne);
+      Y->getType() == Z->getType()) {
+    Instruction *NewPow = createPowiExpr(I, *this, X, Y, Z);
+    return replaceInstUsesWith(I, NewPow);
+  }
+
+  if (Opcode == Instruction::FDiv && I.hasAllowReassoc() && I.hasNoNaNs()) {
+    // powi(X, Y) / X --> powi(X, Y-1)
+    // This is legal when (Y - 1) can't wraparound, in which case reassoc and
+    // nnan are required.
+    // TODO: Multi-use may be also better off creating Powi(x,y-1)
+    if (match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
+                       m_Specific(Op1), m_Value(Y))))) &&
+        willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
+      Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
+      Instruction *NewPow = createPowiExpr(I, *this, Op1, Y, NegOne);
+      return replaceInstUsesWith(I, NewPow);
+    }
+
+    // powi(X, Y) / (X * Z) --> powi(X, Y-1) / Z
+    // This is legal when (Y - 1) can't wraparound, in which case reassoc and
+    // nnan are required.
+    // TODO: Multi-use may be also better off creating Powi(x,y-1)
+    if (match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
+                       m_Value(X), m_Value(Y))))) &&
+        match(Op1, m_AllowReassoc(m_c_FMul(m_Specific(X), m_Value(Z)))) &&
+        willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
+      Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
+      auto *NewPow = createPowiExpr(I, *this, X, Y, NegOne);
+      return BinaryOperator::CreateFDivFMF(NewPow, Z, &I);
+    }
   }
 
   return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/powi.ll b/llvm/test/Transforms/InstCombine/powi.ll
index 6c0575e8b71971..d76f92c1849af9 100644
--- a/llvm/test/Transforms/InstCombine/powi.ll
+++ b/llvm/test/Transforms/InstCombine/powi.ll
@@ -401,6 +401,121 @@ define double @fdiv_pow_powi_negative_variable(double %x, i32 %y) {
   ret double %div
 }
 
+; powi(X,C1)/ (X * Z) --> powi(X,C1 - 1)/ Z
+define double @fdiv_fmul_powi(double %a, double %z) {
+; CHECK-LABEL: @fdiv_fmul_powi(
+; CHECK-NEXT:    [[TMP1:%.*]] = call reassoc nnan double @llvm.powi.f64.i32(double [[A:%.*]], i32 4)
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc nnan double [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %pow = call reassoc double @llvm.powi.f64.i32(double %a, i32 5)
+  %square = fmul reassoc double %z, %a
+  %div = fdiv reassoc nnan double %pow, %square
+  ret double %div
+}
+
+; powi(X, 5)/ (X * X) --> powi(X, 4)/ X -> powi(X, 3)
+define double @fdiv_fmul_powi_2(double %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_2(
+; CHECK-NEXT:    [[DIV:%.*]] = call reassoc nnan double @llvm.powi.f64.i32(double [[A:%.*]], i32 3)
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %pow = call reassoc double @llvm.powi.f64.i32(double %a, i32 5)
+  %square = fmul reassoc double %a, %a
+  %div = fdiv reassoc nnan double %pow, %square
+  ret double %div
+}
+
+define <2 x float> @fdiv_fmul_powi_vector(<2 x float> %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_vector(
+; CHECK-NEXT:    [[DIV:%.*]] = call reassoc nnan <2 x float> @llvm.powi.v2f32.i32(<2 x float> [[A:%.*]], i32 3)
+; CHECK-NEXT:    ret <2 x float> [[DIV]]
+;
+  %pow = call reassoc <2 x float> @llvm.powi.v2f32.i32(<2 x float> %a, i32 5)
+  %square = fmul reassoc <2 x float> %a, %a
+  %div = fdiv reassoc nnan <2 x float> %pow, %square
+  ret <2 x float> %div
+}
+
+; Negative test
+define double @fdiv_fmul_powi_missing_reassoc1(double %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_missing_reassoc1(
+; CHECK-NEXT:    [[POW:%.*]] = call reassoc double @llvm.powi.f64.i32(double [[A:%.*]], i32 5)
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul reassoc double [[A]], [[A]]
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv nnan double [[POW]], [[SQUARE]]
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %pow = call reassoc double @llvm.powi.f64.i32(double %a, i32 5)
+  %square = fmul reassoc double %a, %a
+  %div = fdiv nnan double %pow, %square
+  ret double %div
+}
+
+define double @fdiv_fmul_powi_missing_reassoc2(double %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_missing_reassoc2(
+; CHECK-NEXT:    [[POW:%.*]] = call reassoc double @llvm.powi.f64.i32(double [[A:%.*]], i32 5)
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul double [[A]], [[A]]
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc nnan double [[POW]], [[SQUARE]]
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %pow = call reassoc double @llvm.powi.f64.i32(double %a, i32 5)
+  %square = fmul double %a, %a
+  %div = fdiv reassoc nnan double %pow, %square
+  ret double %div
+}
+
+define double @fdiv_fmul_powi_missing_reassoc3(double %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_missing_reassoc3(
+; CHECK-NEXT:    [[POW:%.*]] = call double @llvm.powi.f64.i32(double [[A:%.*]], i32 5)
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul reassoc double [[A]], [[A]]
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc nnan double [[POW]], [[SQUARE]]
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %pow = call double @llvm.powi.f64.i32(double %a, i32 5)
+  %square = fmul reassoc double %a, %a
+  %div = fdiv reassoc nnan double %pow, %square
+  ret double %div
+}
+
+define double @fdiv_fmul_powi_missing_nnan(double %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_missing_nnan(
+; CHECK-NEXT:    [[POW:%.*]] = call reassoc double @llvm.powi.f64.i32(double [[A:%.*]], i32 5)
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul reassoc double [[A]], [[A]]
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc double [[POW]], [[SQUARE]]
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %pow = call reassoc double @llvm.powi.f64.i32(double %a, i32 5)
+  %square = fmul reassoc double %a, %a
+  %div = fdiv reassoc double %pow, %square
+  ret double %div
+}
+
+define double @fdiv_fmul_powi_negative_wrap(double noundef %x) {
+; CHECK-LABEL: @fdiv_fmul_powi_negative_wrap(
+; CHECK-NEXT:    [[P1:%.*]] = tail call double @llvm.powi.f64.i32(double [[X:%.*]], i32 -2147483648)
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc double [[P1]], [[X]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %p1 = tail call double @llvm.powi.f64.i32(double %x, i32 -2147483648) ; INT_MIN
+  %mul = fmul reassoc double %p1, %x
+  ret double %mul
+}
+
+define double @fdiv_fmul_powi_multi_use(double %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_multi_use(
+; CHECK-NEXT:    [[POW:%.*]] = call reassoc double @llvm.powi.f64.i32(double [[A:%.*]], i32 5)
+; CHECK-NEXT:    tail call void @use(double [[POW]])
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul reassoc double [[A]], [[A]]
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc nnan double [[POW]], [[SQUARE]]
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %pow = call reassoc double @llvm.powi.f64.i32(double %a, i32 5)
+  tail call void @use(double %pow)
+  %square = fmul reassoc double %a, %a
+  %div = fdiv reassoc nnan double %pow, %square
+  ret double %div
+}
+
 ; powi(X, Y) * X --> powi(X, Y+1)
 define double @powi_fmul_powi_x(double noundef %x) {
 ; CHECK-LABEL: @powi_fmul_powi_x(