[llvm] 56ca5ec - [InstCombine] Optimize powi(X, Y)/ (X * Z) with Ofast

Sat Apr 20 21:12:17 PDT 2024

Author: zhongyunde 00443407
Date: 2024-04-21T12:09:20+08:00
New Revision: 56ca5ecf416ad0e57c5e3558159bd73e5d662476

URL: https://github.com/llvm/llvm-project/commit/56ca5ecf416ad0e57c5e3558159bd73e5d662476
DIFF: https://github.com/llvm/llvm-project/commit/56ca5ecf416ad0e57c5e3558159bd73e5d662476.diff

LOG: [InstCombine] Optimize powi(X, Y)/ (X * Z) with Ofast

foldFDivPowDivisor can address A / powi(x, y) to A * powi(x, -y),
while for small const value y, for example y=2, the instcombine will
transform powi(x, 2) to fmul x, x, so it is not optimal for A / powi(x, 2).

Fix https://github.com/llvm/llvm-project/issues/77171

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
    llvm/test/Transforms/InstCombine/powi.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 000d33a091970a..4ed4c36e21e016 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -576,9 +576,10 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
                            Value *Y, Value *Z) {
     InstCombiner::BuilderTy &Builder = IC.Builder;
     Value *YZ = Builder.CreateAdd(Y, Z);
-    auto *NewPow = Builder.CreateIntrinsic(
+    Instruction *NewPow = Builder.CreateIntrinsic(
         Intrinsic::powi, {X->getType(), YZ->getType()}, {X, YZ}, &I);
-    return IC.replaceInstUsesWith(I, NewPow);
+
+    return NewPow;
   };
 
   Value *X, *Y, *Z;
@@ -592,8 +593,10 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
                              m_Value(X), m_Value(Y)))),
                          m_Deferred(X)))) {
     Constant *One = ConstantInt::get(Y->getType(), 1);
-    if (willNotOverflowSignedAdd(Y, One, I))
-      return createPowiExpr(I, *this, X, Y, One);
+    if (willNotOverflowSignedAdd(Y, One, I)) {
+      Instruction *NewPow = createPowiExpr(I, *this, X, Y, One);
+      return replaceInstUsesWith(I, NewPow);
+    }
   }
 
   // powi(x, y) * powi(x, z) -> powi(x, y + z)
@@ -604,19 +607,36 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
                      m_Intrinsic<Intrinsic::powi>(m_Value(X), m_Value(Y)))) &&
       match(Op1, m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(m_Specific(X),
                                                              m_Value(Z)))) &&
-      Y->getType() == Z->getType())
-    return createPowiExpr(I, *this, X, Y, Z);
-
-  // powi(X, Y) / X --> powi(X, Y-1)
-  // This is legal when (Y - 1) can't wraparound, in which case reassoc and nnan
-  // are required.
-  // TODO: Multi-use may be also better off creating Powi(x,y-1)
-  if (Opcode == Instruction::FDiv && I.hasAllowReassoc() && I.hasNoNaNs() &&
-      match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
-                     m_Specific(Op1), m_Value(Y))))) &&
-      willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
-    Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
-    return createPowiExpr(I, *this, Op1, Y, NegOne);
+      Y->getType() == Z->getType()) {
+    Instruction *NewPow = createPowiExpr(I, *this, X, Y, Z);
+    return replaceInstUsesWith(I, NewPow);
+  }
+
+  if (Opcode == Instruction::FDiv && I.hasAllowReassoc() && I.hasNoNaNs()) {
+    // powi(X, Y) / X --> powi(X, Y-1)
+    // This is legal when (Y - 1) can't wraparound, in which case reassoc and
+    // nnan are required.
+    // TODO: Multi-use may be also better off creating Powi(x,y-1)
+    if (match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
+                       m_Specific(Op1), m_Value(Y))))) &&
+        willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
+      Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
+      Instruction *NewPow = createPowiExpr(I, *this, Op1, Y, NegOne);
+      return replaceInstUsesWith(I, NewPow);
+    }
+
+    // powi(X, Y) / (X * Z) --> powi(X, Y-1) / Z
+    // This is legal when (Y - 1) can't wraparound, in which case reassoc and
+    // nnan are required.
+    // TODO: Multi-use may be also better off creating Powi(x,y-1)
+    if (match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
+                       m_Value(X), m_Value(Y))))) &&
+        match(Op1, m_AllowReassoc(m_c_FMul(m_Specific(X), m_Value(Z)))) &&
+        willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
+      Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
+      auto *NewPow = createPowiExpr(I, *this, X, Y, NegOne);
+      return BinaryOperator::CreateFDivFMF(NewPow, Z, &I);
+    }
   }
 
   return nullptr;

diff  --git a/llvm/test/Transforms/InstCombine/powi.ll b/llvm/test/Transforms/InstCombine/powi.ll
index 6c0575e8b71971..d76f92c1849af9 100644
--- a/llvm/test/Transforms/InstCombine/powi.ll
+++ b/llvm/test/Transforms/InstCombine/powi.ll
@@ -401,6 +401,121 @@ define double @fdiv_pow_powi_negative_variable(double %x, i32 %y) {
   ret double %div
 }
 
+; powi(X,C1)/ (X * Z) --> powi(X,C1 - 1)/ Z
+define double @fdiv_fmul_powi(double %a, double %z) {
+; CHECK-LABEL: @fdiv_fmul_powi(
+; CHECK-NEXT:    [[TMP1:%.*]] = call reassoc nnan double @llvm.powi.f64.i32(double [[A:%.*]], i32 4)
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc nnan double [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %pow = call reassoc double @llvm.powi.f64.i32(double %a, i32 5)
+  %square = fmul reassoc double %z, %a
+  %div = fdiv reassoc nnan double %pow, %square
+  ret double %div
+}
+
+; powi(X, 5)/ (X * X) --> powi(X, 4)/ X -> powi(X, 3)
+define double @fdiv_fmul_powi_2(double %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_2(
+; CHECK-NEXT:    [[DIV:%.*]] = call reassoc nnan double @llvm.powi.f64.i32(double [[A:%.*]], i32 3)
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %pow = call reassoc double @llvm.powi.f64.i32(double %a, i32 5)
+  %square = fmul reassoc double %a, %a
+  %div = fdiv reassoc nnan double %pow, %square
+  ret double %div
+}
+
+define <2 x float> @fdiv_fmul_powi_vector(<2 x float> %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_vector(
+; CHECK-NEXT:    [[DIV:%.*]] = call reassoc nnan <2 x float> @llvm.powi.v2f32.i32(<2 x float> [[A:%.*]], i32 3)
+; CHECK-NEXT:    ret <2 x float> [[DIV]]
+;
+  %pow = call reassoc <2 x float> @llvm.powi.v2f32.i32(<2 x float> %a, i32 5)
+  %square = fmul reassoc <2 x float> %a, %a
+  %div = fdiv reassoc nnan <2 x float> %pow, %square
+  ret <2 x float> %div
+}
+
+; Negative test
+define double @fdiv_fmul_powi_missing_reassoc1(double %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_missing_reassoc1(
+; CHECK-NEXT:    [[POW:%.*]] = call reassoc double @llvm.powi.f64.i32(double [[A:%.*]], i32 5)
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul reassoc double [[A]], [[A]]
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv nnan double [[POW]], [[SQUARE]]
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %pow = call reassoc double @llvm.powi.f64.i32(double %a, i32 5)
+  %square = fmul reassoc double %a, %a
+  %div = fdiv nnan double %pow, %square
+  ret double %div
+}
+
+define double @fdiv_fmul_powi_missing_reassoc2(double %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_missing_reassoc2(
+; CHECK-NEXT:    [[POW:%.*]] = call reassoc double @llvm.powi.f64.i32(double [[A:%.*]], i32 5)
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul double [[A]], [[A]]
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc nnan double [[POW]], [[SQUARE]]
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %pow = call reassoc double @llvm.powi.f64.i32(double %a, i32 5)
+  %square = fmul double %a, %a
+  %div = fdiv reassoc nnan double %pow, %square
+  ret double %div
+}
+
+define double @fdiv_fmul_powi_missing_reassoc3(double %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_missing_reassoc3(
+; CHECK-NEXT:    [[POW:%.*]] = call double @llvm.powi.f64.i32(double [[A:%.*]], i32 5)
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul reassoc double [[A]], [[A]]
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc nnan double [[POW]], [[SQUARE]]
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %pow = call double @llvm.powi.f64.i32(double %a, i32 5)
+  %square = fmul reassoc double %a, %a
+  %div = fdiv reassoc nnan double %pow, %square
+  ret double %div
+}
+
+define double @fdiv_fmul_powi_missing_nnan(double %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_missing_nnan(
+; CHECK-NEXT:    [[POW:%.*]] = call reassoc double @llvm.powi.f64.i32(double [[A:%.*]], i32 5)
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul reassoc double [[A]], [[A]]
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc double [[POW]], [[SQUARE]]
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %pow = call reassoc double @llvm.powi.f64.i32(double %a, i32 5)
+  %square = fmul reassoc double %a, %a
+  %div = fdiv reassoc double %pow, %square
+  ret double %div
+}
+
+define double @fdiv_fmul_powi_negative_wrap(double noundef %x) {
+; CHECK-LABEL: @fdiv_fmul_powi_negative_wrap(
+; CHECK-NEXT:    [[P1:%.*]] = tail call double @llvm.powi.f64.i32(double [[X:%.*]], i32 -2147483648)
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc double [[P1]], [[X]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %p1 = tail call double @llvm.powi.f64.i32(double %x, i32 -2147483648) ; INT_MIN
+  %mul = fmul reassoc double %p1, %x
+  ret double %mul
+}
+
+define double @fdiv_fmul_powi_multi_use(double %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_multi_use(
+; CHECK-NEXT:    [[POW:%.*]] = call reassoc double @llvm.powi.f64.i32(double [[A:%.*]], i32 5)
+; CHECK-NEXT:    tail call void @use(double [[POW]])
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul reassoc double [[A]], [[A]]
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc nnan double [[POW]], [[SQUARE]]
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %pow = call reassoc double @llvm.powi.f64.i32(double %a, i32 5)
+  tail call void @use(double %pow)
+  %square = fmul reassoc double %a, %a
+  %div = fdiv reassoc nnan double %pow, %square
+  ret double %div
+}
+
 ; powi(X, Y) * X --> powi(X, Y+1)
 define double @powi_fmul_powi_x(double noundef %x) {
 ; CHECK-LABEL: @powi_fmul_powi_x(