[flang] [llvm] Stack array visit (PR #70816)
Dmitriy Smirnov via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 31 08:05:03 PDT 2023
https://github.com/d-smirnov created https://github.com/llvm/llvm-project/pull/70816
This PR fixes compile-time performance degradation observed on 521.wrf_r with -Ofast.
>From 8d0ea9365fc51e8cdb93e78da99fa80487b38d03 Mon Sep 17 00:00:00 2001
From: Dmitriy Smirnov <dmitriy.smirnov at arm.com>
Date: Fri, 27 Oct 2023 15:58:46 +0000
Subject: [PATCH 1/2] Changed default value of slp-max-vf to 192
1. Changed default value of slp-max-vf to 192
2. Minor performance fix: SmallSet -> SmallDenseSet
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b6895c649f838c1..1cc6248caa76e44 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -138,8 +138,8 @@ MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
cl::desc("Attempt to vectorize for this register size in bits"));
static cl::opt<unsigned>
-MaxVFOption("slp-max-vf", cl::init(0), cl::Hidden,
- cl::desc("Maximum SLP vectorization factor (0=unlimited)"));
+ MaxVFOption("slp-max-vf", cl::init(192), cl::Hidden,
+ cl::desc("Maximum SLP vectorization factor (0=unlimited)"));
/// Limits the size of scheduling regions in a block.
/// It avoid long compile times for _very_ large blocks where vector
@@ -4135,7 +4135,7 @@ static bool areTwoInsertFromSameBuildVector(
// Go through the vector operand of insertelement instructions trying to find
// either VU as the original vector for IE2 or V as the original vector for
// IE1.
- SmallSet<int, 8> ReusedIdx;
+ SmallDenseSet<int, 8> ReusedIdx;
bool IsReusedIdx = false;
do {
if (IE2 == VU && !IE1)
>From 0e84649469087b93ef6e6241599220c6ff01989a Mon Sep 17 00:00:00 2001
From: Dmitriy Smirnov <dmitriy.smirnov at arm.com>
Date: Tue, 31 Oct 2023 14:50:44 +0000
Subject: [PATCH 2/2] [flang] [stack-arrays] Performance fix
Added check preventing AllocationAnalysis visiting an operation more than once
---
.../lib/Optimizer/Transforms/StackArrays.cpp | 6 +++
flang/test/Transforms/if.fir | 39 +++++++++++++++++++
2 files changed, 45 insertions(+)
create mode 100644 flang/test/Transforms/if.fir
diff --git a/flang/lib/Optimizer/Transforms/StackArrays.cpp b/flang/lib/Optimizer/Transforms/StackArrays.cpp
index 9b90aed5a17ae73..41e5dafd04e71bb 100644
--- a/flang/lib/Optimizer/Transforms/StackArrays.cpp
+++ b/flang/lib/Optimizer/Transforms/StackArrays.cpp
@@ -154,6 +154,9 @@ class AllocationAnalysis
/// Visit control flow operations and decide whether to call visitOperation
/// to apply the transfer function
void processOperation(mlir::Operation *op) override;
+
+private:
+ llvm::DenseSet<mlir::Operation *> visited;
};
/// Drives analysis to find candidate fir.allocmem operations which could be
@@ -326,6 +329,9 @@ std::optional<AllocationState> LatticePoint::get(mlir::Value val) const {
void AllocationAnalysis::visitOperation(mlir::Operation *op,
const LatticePoint &before,
LatticePoint *after) {
+ if (!visited.insert(op).second)
+ return;
+
LLVM_DEBUG(llvm::dbgs() << "StackArrays: Visiting operation: " << *op
<< "\n");
LLVM_DEBUG(llvm::dbgs() << "--Lattice in: " << before << "\n");
diff --git a/flang/test/Transforms/if.fir b/flang/test/Transforms/if.fir
new file mode 100644
index 000000000000000..abddd682986ea57
--- /dev/null
+++ b/flang/test/Transforms/if.fir
@@ -0,0 +1,39 @@
+// RUN: fir-opt --stack-arrays --debug-only=stack-arrays %s 2>&1 | grep -v '\-\-' | FileCheck %s
+
+// Check the data-flow-analysis can detect cases where we aren't sure if memory
+// is freed by the end of the function
+func.func @dfa1(%arg0: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}) {
+ %7 = arith.constant 42 : index
+ %8 = fir.allocmem !fir.array<?xi32>, %7 {uniq_name = "_QFdfa1Earr.alloc"}
+ %9 = fir.load %arg0 : !fir.ref<!fir.logical<4>>
+ %10 = fir.convert %9 : (!fir.logical<4>) -> i1
+ fir.if %10 {
+ fir.freemem %8 : !fir.heap<!fir.array<?xi32>>
+ } else {
+ }
+ return
+}
+
+// 8 visits:
+// CHECK: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+///CHECK-NEXT: module {
+// CHECK-NEXT: func.func @dfa1(%arg0: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}) {
+// CHECK-NEXT: %[[C42:.*]] = arith.constant 42 : index
+// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<?xi32>, %[[C42]] {uniq_name = "_QFdfa1Earr.alloc"}
+// CHECK-NEXT: %[[LOGICAL:.*]] = fir.load %arg0 : !fir.ref<!fir.logical<4>>
+// CHECK-NEXT: %[[BOOL:.*]] = fir.convert %[[LOGICAL]] : (!fir.logical<4>) -> i1
+// CHECK-NEXT: fir.if %[[BOOL]] {
+// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<?xi32>>
+// CHECK-NEXT: } else {
+// CHECK-NEXT: }
+// CHECK-NEXT: return
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+
More information about the llvm-commits
mailing list