[llvm] f61f99a - [instcombine] Optimise for zero initialisation of product given fast flags are enabled
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 17 03:12:57 PDT 2022
Author: Zain Jaffal
Date: 2022-08-17T11:12:15+01:00
New Revision: f61f99a105914c7060baf4161ffacc96a0995764
URL: https://github.com/llvm/llvm-project/commit/f61f99a105914c7060baf4161ffacc96a0995764
DIFF: https://github.com/llvm/llvm-project/commit/f61f99a105914c7060baf4161ffacc96a0995764.diff
LOG: [instcombine] Optimise for zero initialisation of product given fast flags are enabled
Currently, clang ignores the 0 initialisation in finite math
For example:
```
double f_prod = 0;
double arr[1000];
for (size_t i = 0; i < 1000; i++) {
f_prod *= arr[i];
}
```
Clang will ignore that `f_prod` is set to zero and it will generate assembly to iterate over the loop.
Reviewed By: fhahn, spatel
Differential Revision: https://reviews.llvm.org/D131672
Added:
Modified:
llvm/lib/Analysis/ValueTracking.cpp
llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
llvm/test/Transforms/InstCombine/remove-loop-phi-fastmul.ll
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index c4f678171d8a8..b2393de81fc4e 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -6538,7 +6538,8 @@ bool llvm::matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO,
case Instruction::Sub:
case Instruction::And:
case Instruction::Or:
- case Instruction::Mul: {
+ case Instruction::Mul:
+ case Instruction::FMul: {
Value *LL = LU->getOperand(0);
Value *LR = LU->getOperand(1);
// Find a recurrence.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 1737009705048..5ccd91d6aa24c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -672,6 +673,15 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
}
}
+ // Simplify FMUL recurrences starting with 0.0 to 0.0 if nnan and nsz are set.
+ // Given a phi node with entry value as 0 and it used in fmul operation,
+ // we can replace fmul with 0 safely and eleminate loop operation.
+ PHINode *PN = nullptr;
+ Value *Start = nullptr, *Step = nullptr;
+ if (matchSimpleRecurrence(&I, PN, Start, Step) && I.hasNoNaNs() &&
+ I.hasNoSignedZeros() && match(Start, m_Zero()))
+ return replaceInstUsesWith(I, Start);
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/remove-loop-phi-fastmul.ll b/llvm/test/Transforms/InstCombine/remove-loop-phi-fastmul.ll
index 7ced747345489..57489317b50b3 100644
--- a/llvm/test/Transforms/InstCombine/remove-loop-phi-fastmul.ll
+++ b/llvm/test/Transforms/InstCombine/remove-loop-phi-fastmul.ll
@@ -6,15 +6,11 @@ define double @test_mul_fast_flags(ptr %arr_d) {
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[F_PROD_01:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
-; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8
-; CHECK-NEXT: [[MUL]] = fmul fast double [[F_PROD_01]], [[TMP0]]
; CHECK-NEXT: [[INC]] = add i64 [[I_02]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], 1000
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]]
; CHECK: end:
-; CHECK-NEXT: ret double [[MUL]]
+; CHECK-NEXT: ret double 0.000000e+00
;
entry:
br label %for.body
@@ -40,15 +36,11 @@ define double @test_nsz_nnan_flags_enabled(ptr %arr_d) {
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[F_PROD_01:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
-; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8
-; CHECK-NEXT: [[MUL]] = fmul nnan nsz double [[F_PROD_01]], [[TMP0]]
; CHECK-NEXT: [[INC]] = add i64 [[I_02]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], 1000
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]]
; CHECK: end:
-; CHECK-NEXT: ret double [[MUL]]
+; CHECK-NEXT: ret double 0.000000e+00
;
entry:
br label %for.body
More information about the llvm-commits
mailing list