[llvm] [flang] Stack array visit (PR #70816)

Tue Oct 31 08:06:57 PDT 2023

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Dmitriy Smirnov (d-smirnov)

<details>
<summary>Changes</summary>

This PR fixes compile-time performance degradation observed on 521.wrf_r with -Ofast. 

---
Full diff: https://github.com/llvm/llvm-project/pull/70816.diff


3 Files Affected:

- (modified) flang/lib/Optimizer/Transforms/StackArrays.cpp (+6) 
- (added) flang/test/Transforms/if.fir (+39) 
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+3-3) 


``````````diff

diff --git a/flang/lib/Optimizer/Transforms/StackArrays.cpp b/flang/lib/Optimizer/Transforms/StackArrays.cpp
index 9b90aed5a17ae73..41e5dafd04e71bb 100644
--- a/flang/lib/Optimizer/Transforms/StackArrays.cpp
+++ b/flang/lib/Optimizer/Transforms/StackArrays.cpp
@@ -154,6 +154,9 @@ class AllocationAnalysis
   /// Visit control flow operations and decide whether to call visitOperation
   /// to apply the transfer function
   void processOperation(mlir::Operation *op) override;
+
+private:
+  llvm::DenseSet<mlir::Operation *> visited;
 };
 
 /// Drives analysis to find candidate fir.allocmem operations which could be
@@ -326,6 +329,9 @@ std::optional<AllocationState> LatticePoint::get(mlir::Value val) const {
 void AllocationAnalysis::visitOperation(mlir::Operation *op,
                                         const LatticePoint &before,
                                         LatticePoint *after) {
+  if (!visited.insert(op).second)
+    return;
+
   LLVM_DEBUG(llvm::dbgs() << "StackArrays: Visiting operation: " << *op
                           << "\n");
   LLVM_DEBUG(llvm::dbgs() << "--Lattice in: " << before << "\n");
diff --git a/flang/test/Transforms/if.fir b/flang/test/Transforms/if.fir
new file mode 100644
index 000000000000000..abddd682986ea57
--- /dev/null
+++ b/flang/test/Transforms/if.fir
@@ -0,0 +1,39 @@
+// RUN: fir-opt --stack-arrays --debug-only=stack-arrays %s 2>&1 | grep -v '\-\-' | FileCheck %s
+
+// Check the data-flow-analysis can detect cases where we aren't sure if memory
+// is freed by the end of the function
+func.func @dfa1(%arg0: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}) {
+ %7 = arith.constant 42 : index
+ %8 = fir.allocmem !fir.array<?xi32>, %7 {uniq_name = "_QFdfa1Earr.alloc"}
+ %9 = fir.load %arg0 : !fir.ref<!fir.logical<4>>
+ %10 = fir.convert %9 : (!fir.logical<4>) -> i1
+ fir.if %10 {
+   fir.freemem %8 : !fir.heap<!fir.array<?xi32>>
+ } else {
+ }
+ return
+}
+
+// 8 visits:
+// CHECK: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+///CHECK-NEXT: module {
+// CHECK-NEXT:   func.func @dfa1(%arg0: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}) {
+// CHECK-NEXT:   %[[C42:.*]] = arith.constant 42 : index
+// CHECK-NEXT:   %[[MEM:.*]] = fir.allocmem !fir.array<?xi32>, %[[C42]] {uniq_name = "_QFdfa1Earr.alloc"}
+// CHECK-NEXT:   %[[LOGICAL:.*]] = fir.load %arg0 : !fir.ref<!fir.logical<4>>
+// CHECK-NEXT:   %[[BOOL:.*]] = fir.convert %[[LOGICAL]] : (!fir.logical<4>) -> i1
+// CHECK-NEXT:   fir.if %[[BOOL]] {
+// CHECK-NEXT:     fir.freemem %[[MEM]] : !fir.heap<!fir.array<?xi32>>
+// CHECK-NEXT:   } else {
+// CHECK-NEXT:   }
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b6895c649f838c1..1cc6248caa76e44 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -138,8 +138,8 @@ MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
     cl::desc("Attempt to vectorize for this register size in bits"));
 
 static cl::opt<unsigned>
-MaxVFOption("slp-max-vf", cl::init(0), cl::Hidden,
-    cl::desc("Maximum SLP vectorization factor (0=unlimited)"));
+    MaxVFOption("slp-max-vf", cl::init(192), cl::Hidden,
+                cl::desc("Maximum SLP vectorization factor (0=unlimited)"));
 
 /// Limits the size of scheduling regions in a block.
 /// It avoid long compile times for _very_ large blocks where vector
@@ -4135,7 +4135,7 @@ static bool areTwoInsertFromSameBuildVector(
   // Go through the vector operand of insertelement instructions trying to find
   // either VU as the original vector for IE2 or V as the original vector for
   // IE1.
-  SmallSet<int, 8> ReusedIdx;
+  SmallDenseSet<int, 8> ReusedIdx;
   bool IsReusedIdx = false;
   do {
     if (IE2 == VU && !IE1)

``````````

</details>


https://github.com/llvm/llvm-project/pull/70816