[Mlir-commits] [flang] [mlir] [mlir][LLVMIR][OpenMP] Move reduction allocas before stores (PR #105683)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Thu Aug 22 08:57:31 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-flang-fir-hlfir

Author: Tom Eccles (tblah)

<details>
<summary>Changes</summary>

Delay implicit reduction var stores until after all allocas. This fixes a couple of cases in existing unit tests.

Follow up to https://github.com/llvm/llvm-project/pull/102524

---
Full diff: https://github.com/llvm/llvm-project/pull/105683.diff


3 Files Affected:

- (modified) flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 (+2-2) 
- (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+32-21) 
- (modified) mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir (+1-1) 


``````````diff
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
index 1457be05ca1025..262075ec9b25d0 100644
--- a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
+++ b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
@@ -26,10 +26,10 @@ end subroutine proc
 !CHECK:  store i32 %[[TID]], ptr %[[TID_LOCAL]]
 !CHECK:  %[[F_priv:.*]] = alloca ptr
 !CHECK:  %[[I_priv:.*]] = alloca i32
-!CHECK:  store ptr %{{.*}}, ptr %[[F_priv]]
 
 !CHECK: omp.reduction.init:
-!CHECK: store i32 0, ptr %[[I_priv]]
+!CHECK:  store ptr %{{.*}}, ptr %[[F_priv]]
+!CHECK:  store i32 0, ptr %[[I_priv]]
 
 !CHECK: omp.par.region:
 !CHECK:  br label %[[MALLOC_BB:.*]]
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 6d14d77c440e67..b64bd7d45a7705 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -593,22 +593,23 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
 }
 
 /// Allocate space for privatized reduction variables.
+/// `deferredStores` contains information to create store operations which needs
+/// to be inserted after all allocas
 template <typename T>
-static LogicalResult
-allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
-                   llvm::IRBuilderBase &builder,
-                   LLVM::ModuleTranslation &moduleTranslation,
-                   llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
-                   SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
-                   SmallVectorImpl<llvm::Value *> &privateReductionVariables,
-                   DenseMap<Value, llvm::Value *> &reductionVariableMap,
-                   llvm::ArrayRef<bool> isByRefs) {
+static LogicalResult allocReductionVars(
+    T loop, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
+    LLVM::ModuleTranslation &moduleTranslation,
+    llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
+    SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
+    SmallVectorImpl<llvm::Value *> &privateReductionVariables,
+    DenseMap<Value, llvm::Value *> &reductionVariableMap,
+    SmallVectorImpl<std::pair<llvm::Value *, llvm::Value *>> &deferredStores,
+    llvm::ArrayRef<bool> isByRefs) {
   llvm::IRBuilderBase::InsertPointGuard guard(builder);
   builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
 
   // delay creating stores until after all allocas
-  SmallVector<std::pair<llvm::Value *, llvm::Value *>> storesToCreate;
-  storesToCreate.reserve(loop.getNumReductionVars());
+  deferredStores.reserve(loop.getNumReductionVars());
 
   for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
     Region &allocRegion = reductionDecls[i].getAllocRegion();
@@ -628,7 +629,7 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
       // variable allocated in the inlined region)
       llvm::Value *var = builder.CreateAlloca(
           moduleTranslation.convertType(reductionDecls[i].getType()));
-      storesToCreate.emplace_back(phis[0], var);
+      deferredStores.emplace_back(phis[0], var);
 
       privateReductionVariables[i] = var;
       moduleTranslation.mapValue(reductionArgs[i], phis[0]);
@@ -644,10 +645,6 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
     }
   }
 
-  // TODO: further delay this so it doesn't come in the entry block at all
-  for (auto [data, addr] : storesToCreate)
-    builder.CreateStore(data, addr);
-
   return success();
 }
 
@@ -819,12 +816,19 @@ static LogicalResult allocAndInitializeReductionVars(
   if (op.getNumReductionVars() == 0)
     return success();
 
+  SmallVector<std::pair<llvm::Value *, llvm::Value *>> deferredStores;
+
   if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
                                 allocaIP, reductionDecls,
                                 privateReductionVariables, reductionVariableMap,
-                                isByRef)))
+                                deferredStores, isByRef)))
     return failure();
 
+  // store result of the alloc region to the allocated pointer to the real
+  // reduction variable
+  for (auto [data, addr] : deferredStores)
+    builder.CreateStore(data, addr);
+
   // Before the loop, store the initial values of reductions into reduction
   // variables. Although this could be done after allocas, we don't want to mess
   // up with the alloca insertion point.
@@ -1359,6 +1363,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
   collectReductionDecls(opInst, reductionDecls);
   SmallVector<llvm::Value *> privateReductionVariables(
       opInst.getNumReductionVars());
+  SmallVector<std::pair<llvm::Value *, llvm::Value *>> deferredStores;
 
   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
     // Allocate reduction vars
@@ -1373,10 +1378,10 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
         InsertPointTy(allocaIP.getBlock(),
                       allocaIP.getBlock()->getTerminator()->getIterator());
 
-    if (failed(allocReductionVars(opInst, reductionArgs, builder,
-                                  moduleTranslation, allocaIP, reductionDecls,
-                                  privateReductionVariables,
-                                  reductionVariableMap, isByRef)))
+    if (failed(allocReductionVars(
+            opInst, reductionArgs, builder, moduleTranslation, allocaIP,
+            reductionDecls, privateReductionVariables, reductionVariableMap,
+            deferredStores, isByRef)))
       bodyGenStatus = failure();
 
     // Initialize reduction vars
@@ -1401,6 +1406,12 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
 
     builder.SetInsertPoint(initBlock->getFirstNonPHIOrDbgOrAlloca());
 
+    // insert stores deferred until after all allocas
+    // these store the results of the alloc region into the allocation for the
+    // pointer to the reduction variable
+    for (auto [data, addr] : deferredStores)
+      builder.CreateStore(data, addr);
+
     for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
       SmallVector<llvm::Value *> phis;
 
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
index da6f9430046123..2d8a13ccd2a1f5 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
@@ -89,7 +89,6 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute
 // CHECK:         %[[VAL_13:.*]] = load i32, ptr %[[VAL_10]], align 4
 // CHECK:         %[[VAL_20:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
 // CHECK:         %[[VAL_21:.*]] = alloca ptr, align 8
-// CHECK:         store ptr %[[VAL_20]], ptr %[[VAL_21]], align 8
 // CHECK:         %[[VAL_14:.*]] = alloca [1 x ptr], align 8
 // CHECK:         br label %[[VAL_15:.*]]
 // CHECK:       omp.reduction.init:                               ; preds = %[[VAL_16:.*]]
@@ -98,6 +97,7 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute
 // CHECK:         br label %[[VAL_18:.*]]
 // CHECK:       omp.par.region1:                                  ; preds = %[[VAL_17]]
 // CHECK:         %[[VAL_19:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
+// CHECK:         store ptr %[[VAL_20]], ptr %[[VAL_21]], align 8
 // CHECK:         br label %[[VAL_22:.*]]
 // CHECK:       omp_section_loop.preheader:                       ; preds = %[[VAL_18]]
 // CHECK:         store i32 0, ptr %[[VAL_7]], align 4

``````````

</details>


https://github.com/llvm/llvm-project/pull/105683


More information about the Mlir-commits mailing list