[llvm] [flang] Stack array visit (PR #70816)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 31 08:06:57 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Dmitriy Smirnov (d-smirnov)
<details>
<summary>Changes</summary>
This PR fixes compile-time performance degradation observed on 521.wrf_r with -Ofast.
---
Full diff: https://github.com/llvm/llvm-project/pull/70816.diff
3 Files Affected:
- (modified) flang/lib/Optimizer/Transforms/StackArrays.cpp (+6)
- (added) flang/test/Transforms/if.fir (+39)
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+3-3)
``````````diff
diff --git a/flang/lib/Optimizer/Transforms/StackArrays.cpp b/flang/lib/Optimizer/Transforms/StackArrays.cpp
index 9b90aed5a17ae73..41e5dafd04e71bb 100644
--- a/flang/lib/Optimizer/Transforms/StackArrays.cpp
+++ b/flang/lib/Optimizer/Transforms/StackArrays.cpp
@@ -154,6 +154,9 @@ class AllocationAnalysis
/// Visit control flow operations and decide whether to call visitOperation
/// to apply the transfer function
void processOperation(mlir::Operation *op) override;
+
+private:
+ llvm::DenseSet<mlir::Operation *> visited;
};
/// Drives analysis to find candidate fir.allocmem operations which could be
@@ -326,6 +329,9 @@ std::optional<AllocationState> LatticePoint::get(mlir::Value val) const {
void AllocationAnalysis::visitOperation(mlir::Operation *op,
const LatticePoint &before,
LatticePoint *after) {
+ if (!visited.insert(op).second)
+ return;
+
LLVM_DEBUG(llvm::dbgs() << "StackArrays: Visiting operation: " << *op
<< "\n");
LLVM_DEBUG(llvm::dbgs() << "--Lattice in: " << before << "\n");
diff --git a/flang/test/Transforms/if.fir b/flang/test/Transforms/if.fir
new file mode 100644
index 000000000000000..abddd682986ea57
--- /dev/null
+++ b/flang/test/Transforms/if.fir
@@ -0,0 +1,39 @@
+// RUN: fir-opt --stack-arrays --debug-only=stack-arrays %s 2>&1 | grep -v '\-\-' | FileCheck %s
+
+// Check the data-flow-analysis can detect cases where we aren't sure if memory
+// is freed by the end of the function
+func.func @dfa1(%arg0: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}) {
+ %7 = arith.constant 42 : index
+ %8 = fir.allocmem !fir.array<?xi32>, %7 {uniq_name = "_QFdfa1Earr.alloc"}
+ %9 = fir.load %arg0 : !fir.ref<!fir.logical<4>>
+ %10 = fir.convert %9 : (!fir.logical<4>) -> i1
+ fir.if %10 {
+ fir.freemem %8 : !fir.heap<!fir.array<?xi32>>
+ } else {
+ }
+ return
+}
+
+// 8 visits:
+// CHECK: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+///CHECK-NEXT: module {
+// CHECK-NEXT: func.func @dfa1(%arg0: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}) {
+// CHECK-NEXT: %[[C42:.*]] = arith.constant 42 : index
+// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<?xi32>, %[[C42]] {uniq_name = "_QFdfa1Earr.alloc"}
+// CHECK-NEXT: %[[LOGICAL:.*]] = fir.load %arg0 : !fir.ref<!fir.logical<4>>
+// CHECK-NEXT: %[[BOOL:.*]] = fir.convert %[[LOGICAL]] : (!fir.logical<4>) -> i1
+// CHECK-NEXT: fir.if %[[BOOL]] {
+// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<?xi32>>
+// CHECK-NEXT: } else {
+// CHECK-NEXT: }
+// CHECK-NEXT: return
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b6895c649f838c1..1cc6248caa76e44 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -138,8 +138,8 @@ MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
cl::desc("Attempt to vectorize for this register size in bits"));
static cl::opt<unsigned>
-MaxVFOption("slp-max-vf", cl::init(0), cl::Hidden,
- cl::desc("Maximum SLP vectorization factor (0=unlimited)"));
+ MaxVFOption("slp-max-vf", cl::init(192), cl::Hidden,
+ cl::desc("Maximum SLP vectorization factor (0=unlimited)"));
/// Limits the size of scheduling regions in a block.
/// It avoid long compile times for _very_ large blocks where vector
@@ -4135,7 +4135,7 @@ static bool areTwoInsertFromSameBuildVector(
// Go through the vector operand of insertelement instructions trying to find
// either VU as the original vector for IE2 or V as the original vector for
// IE1.
- SmallSet<int, 8> ReusedIdx;
+ SmallDenseSet<int, 8> ReusedIdx;
bool IsReusedIdx = false;
do {
if (IE2 == VU && !IE1)
``````````
</details>
https://github.com/llvm/llvm-project/pull/70816
More information about the llvm-commits
mailing list