[llvm] [InstCombine] Optimize powi(X, Y)/ (X * Z) with Ofast (PR #87047)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 29 05:06:28 PDT 2024
https://github.com/vfdff updated https://github.com/llvm/llvm-project/pull/87047
>From 3a9ba822021eecd9d21be3e6dfc327478e61e696 Mon Sep 17 00:00:00 2001
From: zhongyunde 00443407 <zhongyunde at huawei.com>
Date: Thu, 28 Mar 2024 21:39:30 -0400
Subject: [PATCH 1/2] [InstCombine] Add check to avoid dependent optimization
order, NFC
Since PR86428, foldPowiReassoc is called by both FMul and FDiv,
as the optimization of FDiv is placed after the FMul, so now
it is correct we don't add the checking of FDiv for powi(X, Y) / X.
But, we may add more matching scenarios later, so add the checking opcode
explicitly is easier to understand.
llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 8c698e52b5a0e6..bb405887f29762 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -589,6 +589,9 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
Value *X, *Y, *Z;
+ unsigned Opcode = I.getOpcode();
+ assert((Opcode == Instruction::FMul || Opcode == Instruction::FDiv) &&
+ "Unexpected opcode");
// powi(X, Y) * X --> powi(X, Y+1)
// X * powi(X, Y) --> powi(X, Y+1)
@@ -603,7 +606,7 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
// powi(x, y) * powi(x, z) -> powi(x, y + z)
Value *Op0 = I.getOperand(0);
Value *Op1 = I.getOperand(1);
- if (I.isOnlyUserOfAnyOperand() &&
+ if (Opcode == Instruction::FMul && I.isOnlyUserOfAnyOperand() &&
match(Op0, m_AllowReassoc(
m_Intrinsic<Intrinsic::powi>(m_Value(X), m_Value(Y)))) &&
match(Op1, m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(m_Specific(X),
@@ -615,7 +618,7 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
// This is legal when (Y - 1) can't wraparound, in which case reassoc and nnan
// are required.
// TODO: Multi-use may be also better off creating Powi(x,y-1)
- if (I.hasAllowReassoc() && I.hasNoNaNs() &&
+ if (Opcode == Instruction::FDiv && I.hasAllowReassoc() && I.hasNoNaNs() &&
match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
m_Specific(Op1), m_Value(Y))))) &&
willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
>From d05ae387ba075390ae67f08a418b5678eb1e6c95 Mon Sep 17 00:00:00 2001
From: zhongyunde 00443407 <zhongyunde at huawei.com>
Date: Thu, 28 Mar 2024 22:18:37 -0400
Subject: [PATCH 2/2] [InstCombine] Optimize powi(X, Y)/ (X * Z) with Ofast
foldFDivPowDivisor can address A / powi(x, y) to A * powi(x, -y),
while for small const value y, for example y=2, the instcombine will
transform powi(x, 2) to fmul x, x, so it is not optimal for A / powi(x, 2).
Fix https://github.com/llvm/llvm-project/issues/77171
.../InstCombine/InstCombineMulDivRem.cpp | 43 +++++--
llvm/test/Transforms/InstCombine/powi.ll | 115 ++++++++++++++++++
2 files changed, 145 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index bb405887f29762..c48d91252c9782 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -580,12 +580,15 @@ Instruction *InstCombinerImpl::foldFPSignBitOps(BinaryOperator &I) {
Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
auto createPowiExpr = [](BinaryOperator &I, InstCombinerImpl &IC, Value *X,
- Value *Y, Value *Z) {
+ Value *Y, Value *Z, bool UpdateUsers = true) {
InstCombiner::BuilderTy &Builder = IC.Builder;
Value *YZ = Builder.CreateAdd(Y, Z);
- auto *NewPow = Builder.CreateIntrinsic(
+ Instruction *NewPow = Builder.CreateIntrinsic(
Intrinsic::powi, {X->getType(), YZ->getType()}, {X, YZ}, &I);
- return IC.replaceInstUsesWith(I, NewPow);
+ if (UpdateUsers)
+ return IC.replaceInstUsesWith(I, NewPow);
+ return NewPow;
Value *X, *Y, *Z;
@@ -614,16 +617,30 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
Y->getType() == Z->getType())
return createPowiExpr(I, *this, X, Y, Z);
- // powi(X, Y) / X --> powi(X, Y-1)
- // This is legal when (Y - 1) can't wraparound, in which case reassoc and nnan
- // are required.
- // TODO: Multi-use may be also better off creating Powi(x,y-1)
- if (Opcode == Instruction::FDiv && I.hasAllowReassoc() && I.hasNoNaNs() &&
- match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
- m_Specific(Op1), m_Value(Y))))) &&
- willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
- Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
- return createPowiExpr(I, *this, Op1, Y, NegOne);
+ if (Opcode == Instruction::FDiv && I.hasAllowReassoc() && I.hasNoNaNs()) {
+ // powi(X, Y) / X --> powi(X, Y-1)
+ // This is legal when (Y - 1) can't wraparound, in which case reassoc and
+ // nnan are required.
+ // TODO: Multi-use may be also better off creating Powi(x,y-1)
+ if (match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
+ m_Specific(Op1), m_Value(Y))))) &&
+ willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
+ Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
+ return createPowiExpr(I, *this, Op1, Y, NegOne);
+ }
+ // powi(X, Y) / (X * Z) --> powi(X, Y-1) / Z
+ // This is legal when (Y - 1) can't wraparound, in which case reassoc and
+ // nnan are required.
+ // TODO: Multi-use may be also better off creating Powi(x,y-1)
+ if (match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
+ m_Value(X), m_Value(Y))))) &&
+ match(Op1, m_AllowReassoc(m_c_FMul(m_Specific(X), m_Value(Z)))) &&
+ willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
+ Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
+ auto *NewPow = createPowiExpr(I, *this, X, Y, NegOne, false);
+ return BinaryOperator::CreateFDivFMF(NewPow, Z, &I);
+ }
return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/powi.ll b/llvm/test/Transforms/InstCombine/powi.ll
index 6c0575e8b71971..6d764dc711180c 100644
--- a/llvm/test/Transforms/InstCombine/powi.ll
+++ b/llvm/test/Transforms/InstCombine/powi.ll
@@ -401,6 +401,121 @@ define double @fdiv_pow_powi_negative_variable(double %x, i32 %y) {
ret double %div
+; powi(X,C1)/ (X * Z) --> powi(X,C1 - 1)/ Z
+define double @fdiv_fmul_powi(double %a, double %z) {
+; CHECK-LABEL: @fdiv_fmul_powi(
+; CHECK-NEXT: [[TMP1:%.*]] = call reassoc nnan double @llvm.powi.f64.i32(double [[A:%.*]], i32 4)
+; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc nnan double [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT: ret double [[DIV]]
+ %pow = call reassoc double @llvm.powi.f64.i32(double %a, i32 5)
+ %square = fmul reassoc double %z, %a
+ %div = fdiv reassoc nnan double %pow, %square
+ ret double %div
+; powi(X, 5)/ (X * X) --> powi(X, 4)/ X -> powi(X, 3)
+define double @fdiv_fmul_powi_2(double %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_2(
+; CHECK-NEXT: [[DIV:%.*]] = call reassoc nnan double @llvm.powi.f64.i32(double [[A:%.*]], i32 3)
+; CHECK-NEXT: ret double [[DIV]]
+ %pow = call reassoc double @llvm.powi.f64.i32(double %a, i32 5)
+ %square = fmul reassoc double %a, %a
+ %div = fdiv reassoc nnan double %pow, %square
+ ret double %div
+define <2 x float> @fdiv_fmul_powi_vector(<2 x float> %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_vector(
+; CHECK-NEXT: [[DIV:%.*]] = call reassoc nnan <2 x float> @llvm.powi.v2f32.i32(<2 x float> [[A:%.*]], i32 3)
+; CHECK-NEXT: ret <2 x float> [[DIV]]
+ %pow = call reassoc <2 x float> @llvm.powi.v2f16.i32(<2 x float> %a, i32 5)
+ %square = fmul reassoc <2 x float> %a, %a
+ %div = fdiv reassoc nnan <2 x float> %pow, %square
+ ret <2 x float> %div
+; Negative test
+define double @fdiv_fmul_powi_missing_reassoc1(double %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_missing_reassoc1(
+; CHECK-NEXT: [[POW:%.*]] = call reassoc double @llvm.powi.f64.i32(double [[A:%.*]], i32 5)
+; CHECK-NEXT: [[SQUARE:%.*]] = fmul reassoc double [[A]], [[A]]
+; CHECK-NEXT: [[DIV:%.*]] = fdiv nnan double [[POW]], [[SQUARE]]
+; CHECK-NEXT: ret double [[DIV]]
+ %pow = call reassoc double @llvm.powi.f64.i32(double %a, i32 5)
+ %square = fmul reassoc double %a, %a
+ %div = fdiv nnan double %pow, %square
+ ret double %div
+define double @fdiv_fmul_powi_missing_reassoc2(double %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_missing_reassoc2(
+; CHECK-NEXT: [[POW:%.*]] = call reassoc double @llvm.powi.f64.i32(double [[A:%.*]], i32 5)
+; CHECK-NEXT: [[SQUARE:%.*]] = fmul double [[A]], [[A]]
+; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc nnan double [[POW]], [[SQUARE]]
+; CHECK-NEXT: ret double [[DIV]]
+ %pow = call reassoc double @llvm.powi.f64.i32(double %a, i32 5)
+ %square = fmul double %a, %a
+ %div = fdiv reassoc nnan double %pow, %square
+ ret double %div
+define double @fdiv_fmul_powi_missing_reassoc3(double %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_missing_reassoc3(
+; CHECK-NEXT: [[POW:%.*]] = call double @llvm.powi.f64.i32(double [[A:%.*]], i32 5)
+; CHECK-NEXT: [[SQUARE:%.*]] = fmul reassoc double [[A]], [[A]]
+; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc nnan double [[POW]], [[SQUARE]]
+; CHECK-NEXT: ret double [[DIV]]
+ %pow = call double @llvm.powi.f64.i32(double %a, i32 5)
+ %square = fmul reassoc double %a, %a
+ %div = fdiv reassoc nnan double %pow, %square
+ ret double %div
+define double @fdiv_fmul_powi_missing_nnan(double %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_missing_nnan(
+; CHECK-NEXT: [[POW:%.*]] = call reassoc double @llvm.powi.f64.i32(double [[A:%.*]], i32 5)
+; CHECK-NEXT: [[SQUARE:%.*]] = fmul reassoc double [[A]], [[A]]
+; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc double [[POW]], [[SQUARE]]
+; CHECK-NEXT: ret double [[DIV]]
+ %pow = call reassoc double @llvm.powi.f64.i32(double %a, i32 5)
+ %square = fmul reassoc double %a, %a
+ %div = fdiv reassoc double %pow, %square
+ ret double %div
+define double @fdiv_fmul_powi_negative_wrap(double noundef %x) {
+; CHECK-LABEL: @fdiv_fmul_powi_negative_wrap(
+; CHECK-NEXT: [[P1:%.*]] = tail call double @llvm.powi.f64.i32(double [[X:%.*]], i32 -2147483648)
+; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[P1]], [[X]]
+; CHECK-NEXT: ret double [[MUL]]
+ %p1 = tail call double @llvm.powi.f64.i32(double %x, i32 -2147483648) ; INT_MIN
+ %mul = fmul reassoc double %p1, %x
+ ret double %mul
+define double @fdiv_fmul_powi_multi_use(double %a) {
+; CHECK-LABEL: @fdiv_fmul_powi_multi_use(
+; CHECK-NEXT: [[POW:%.*]] = call reassoc double @llvm.powi.f64.i32(double [[A:%.*]], i32 5)
+; CHECK-NEXT: tail call void @use(double [[POW]])
+; CHECK-NEXT: [[SQUARE:%.*]] = fmul reassoc double [[A]], [[A]]
+; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc nnan double [[POW]], [[SQUARE]]
+; CHECK-NEXT: ret double [[DIV]]
+ %pow = call reassoc double @llvm.powi.f64.i32(double %a, i32 5)
+ tail call void @use(double %pow)
+ %square = fmul reassoc double %a, %a
+ %div = fdiv reassoc nnan double %pow, %square
+ ret double %div
; powi(X, Y) * X --> powi(X, Y+1)
define double @powi_fmul_powi_x(double noundef %x) {
; CHECK-LABEL: @powi_fmul_powi_x(
More information about the llvm-commits
mailing list