[llvm] c9e8c9e - [InstCombine] fold fmul/fdiv with fabs operands
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 25 08:36:37 PDT 2020
Author: Sanjay Patel
Date: 2020-06-25T11:35:38-04:00
New Revision: c9e8c9e3ea2681dc18dd3a2d43d6aa8f37252649
URL: https://github.com/llvm/llvm-project/commit/c9e8c9e3ea2681dc18dd3a2d43d6aa8f37252649
DIFF: https://github.com/llvm/llvm-project/commit/c9e8c9e3ea2681dc18dd3a2d43d6aa8f37252649.diff
LOG: [InstCombine] fold fmul/fdiv with fabs operands
fabs(X) * fabs(Y) --> fabs(X * Y)
fabs(X) / fabs(Y) --> fabs(X / Y)
If both operands of fmul/fdiv are positive, then the result must be positive.
There's a NAN corner-case that prevents removing the more specific fold just
above this one:
fabs(X) * fabs(X) -> X * X
That fold works even with NAN because the sign-bit result of the multiply is
not specified if X is NAN.
We can't remove that and use the more general fold that is proposed here
because once we convert to this:
fabs (X * X)
...it is not legal to simplify the 'fabs' out of that expression when X is NAN.
That's because fabs() guarantees that the sign-bit is always cleared - even
for NAN values.
So this patch has the potential to lose information, but it seems unlikely if
we do the more specific fold ahead of this one.
Differential Revision: https://reviews.llvm.org/D82277
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineInternal.h
llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
llvm/test/Transforms/InstCombine/fdiv.ll
llvm/test/Transforms/InstCombine/fmul.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index d256e77f9a59..8caf580a3fd6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -635,6 +635,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
Value *getSelectCondition(Value *A, Value *B);
Instruction *foldIntrinsicWithOverflowCommon(IntrinsicInst *II);
+ Instruction *foldFPSignBitOps(BinaryOperator &I);
public:
/// Inserts an instruction \p New before instruction \p Old
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index cbbde9a1aeb8..2d4fdd3eeb40 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -402,7 +402,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
return Changed ? &I : nullptr;
}
-static Instruction *foldFPSignBitOps(BinaryOperator &I) {
+Instruction *InstCombiner::foldFPSignBitOps(BinaryOperator &I) {
BinaryOperator::BinaryOps Opcode = I.getOpcode();
assert((Opcode == Instruction::FMul || Opcode == Instruction::FDiv) &&
"Expected fmul or fdiv");
@@ -420,6 +420,19 @@ static Instruction *foldFPSignBitOps(BinaryOperator &I) {
if (Op0 == Op1 && match(Op0, m_Intrinsic<Intrinsic::fabs>(m_Value(X))))
return BinaryOperator::CreateWithCopiedFlags(Opcode, X, X, &I);
+ // fabs(X) * fabs(Y) --> fabs(X * Y)
+ // fabs(X) / fabs(Y) --> fabs(X / Y)
+ if (match(Op0, m_Intrinsic<Intrinsic::fabs>(m_Value(X))) &&
+ match(Op1, m_Intrinsic<Intrinsic::fabs>(m_Value(Y))) &&
+ (Op0->hasOneUse() || Op1->hasOneUse())) {
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+ Builder.setFastMathFlags(I.getFastMathFlags());
+ Value *XY = Builder.CreateBinOp(Opcode, X, Y);
+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, XY);
+ Fabs->takeName(&I);
+ return replaceInstUsesWith(I, Fabs);
+ }
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll
index deceb724007e..7b1a556439b7 100644
--- a/llvm/test/Transforms/InstCombine/fdiv.ll
+++ b/llvm/test/Transforms/InstCombine/fdiv.ll
@@ -598,9 +598,8 @@ define float @fabs_same_op_extra_use(float %x) {
define float @fabs_fabs(float %x, float %y) {
; CHECK-LABEL: @fabs_fabs(
-; CHECK-NEXT: [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
-; CHECK-NEXT: [[Y_FABS:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]])
-; CHECK-NEXT: [[R:%.*]] = fdiv float [[X_FABS]], [[Y_FABS]]
+; CHECK-NEXT: [[TMP1:%.*]] = fdiv float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[R:%.*]] = call float @llvm.fabs.f32(float [[TMP1]])
; CHECK-NEXT: ret float [[R]]
;
%x.fabs = call float @llvm.fabs.f32(float %x)
@@ -613,8 +612,8 @@ define float @fabs_fabs_extra_use1(float %x, float %y) {
; CHECK-LABEL: @fabs_fabs_extra_use1(
; CHECK-NEXT: [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
; CHECK-NEXT: call void @use_f32(float [[X_FABS]])
-; CHECK-NEXT: [[Y_FABS:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]])
-; CHECK-NEXT: [[R:%.*]] = fdiv ninf float [[X_FABS]], [[Y_FABS]]
+; CHECK-NEXT: [[TMP1:%.*]] = fdiv ninf float [[X]], [[Y:%.*]]
+; CHECK-NEXT: [[R:%.*]] = call ninf float @llvm.fabs.f32(float [[TMP1]])
; CHECK-NEXT: ret float [[R]]
;
%x.fabs = call float @llvm.fabs.f32(float %x)
@@ -626,10 +625,10 @@ define float @fabs_fabs_extra_use1(float %x, float %y) {
define float @fabs_fabs_extra_use2(float %x, float %y) {
; CHECK-LABEL: @fabs_fabs_extra_use2(
-; CHECK-NEXT: [[X_FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X:%.*]])
; CHECK-NEXT: [[Y_FABS:%.*]] = call fast float @llvm.fabs.f32(float [[Y:%.*]])
; CHECK-NEXT: call void @use_f32(float [[Y_FABS]])
-; CHECK-NEXT: [[R:%.*]] = fdiv reassoc ninf float [[X_FABS]], [[Y_FABS]]
+; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc ninf float [[X:%.*]], [[Y]]
+; CHECK-NEXT: [[R:%.*]] = call reassoc ninf float @llvm.fabs.f32(float [[TMP1]])
; CHECK-NEXT: ret float [[R]]
;
%x.fabs = call fast float @llvm.fabs.f32(float %x)
@@ -639,6 +638,8 @@ define float @fabs_fabs_extra_use2(float %x, float %y) {
ret float %r
}
+; negative test - don't create an extra instruction
+
define float @fabs_fabs_extra_use3(float %x, float %y) {
; CHECK-LABEL: @fabs_fabs_extra_use3(
; CHECK-NEXT: [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll
index 0559a7907de0..8e168f252978 100644
--- a/llvm/test/Transforms/InstCombine/fmul.ll
+++ b/llvm/test/Transforms/InstCombine/fmul.ll
@@ -482,9 +482,8 @@ define float @fabs_squared_fast(float %x) {
define float @fabs_fabs(float %x, float %y) {
; CHECK-LABEL: @fabs_fabs(
-; CHECK-NEXT: [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
-; CHECK-NEXT: [[Y_FABS:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]])
-; CHECK-NEXT: [[MUL:%.*]] = fmul float [[X_FABS]], [[Y_FABS]]
+; CHECK-NEXT: [[TMP1:%.*]] = fmul float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[MUL:%.*]] = call float @llvm.fabs.f32(float [[TMP1]])
; CHECK-NEXT: ret float [[MUL]]
;
%x.fabs = call float @llvm.fabs.f32(float %x)
@@ -497,8 +496,8 @@ define float @fabs_fabs_extra_use1(float %x, float %y) {
; CHECK-LABEL: @fabs_fabs_extra_use1(
; CHECK-NEXT: [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
; CHECK-NEXT: call void @use_f32(float [[X_FABS]])
-; CHECK-NEXT: [[Y_FABS:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]])
-; CHECK-NEXT: [[MUL:%.*]] = fmul ninf float [[X_FABS]], [[Y_FABS]]
+; CHECK-NEXT: [[TMP1:%.*]] = fmul ninf float [[X]], [[Y:%.*]]
+; CHECK-NEXT: [[MUL:%.*]] = call ninf float @llvm.fabs.f32(float [[TMP1]])
; CHECK-NEXT: ret float [[MUL]]
;
%x.fabs = call float @llvm.fabs.f32(float %x)
@@ -510,10 +509,10 @@ define float @fabs_fabs_extra_use1(float %x, float %y) {
define float @fabs_fabs_extra_use2(float %x, float %y) {
; CHECK-LABEL: @fabs_fabs_extra_use2(
-; CHECK-NEXT: [[X_FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X:%.*]])
; CHECK-NEXT: [[Y_FABS:%.*]] = call fast float @llvm.fabs.f32(float [[Y:%.*]])
; CHECK-NEXT: call void @use_f32(float [[Y_FABS]])
-; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc ninf float [[X_FABS]], [[Y_FABS]]
+; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc ninf float [[X:%.*]], [[Y]]
+; CHECK-NEXT: [[MUL:%.*]] = call reassoc ninf float @llvm.fabs.f32(float [[TMP1]])
; CHECK-NEXT: ret float [[MUL]]
;
%x.fabs = call fast float @llvm.fabs.f32(float %x)
@@ -523,6 +522,8 @@ define float @fabs_fabs_extra_use2(float %x, float %y) {
ret float %mul
}
+; negative test - don't create an extra instruction
+
define float @fabs_fabs_extra_use3(float %x, float %y) {
; CHECK-LABEL: @fabs_fabs_extra_use3(
; CHECK-NEXT: [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
More information about the llvm-commits
mailing list