[flang-commits] [flang] [mlir] [mlir][LLVMIR][OpenMP] Move reduction allocas before stores (PR #105683)
Tom Eccles via flang-commits
flang-commits at lists.llvm.org
Tue Aug 27 03:18:10 PDT 2024
https://github.com/tblah updated https://github.com/llvm/llvm-project/pull/105683
>From 9d21fb8f37dbeb36015546907e94f2281d95b7c9 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Thu, 22 Aug 2024 14:20:38 +0000
Subject: [PATCH 1/2] [mlir][LLVMIR][OpenMP] Move reduction allocas before
stores
Delay implicit reduction var stores until after all allocas. This fixes
a couple of cases in existing unit tests.
---
.../Lower/OpenMP/parallel-reduction-mixed.f90 | 4 +-
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 53 +++++++++++--------
.../openmp-reduction-array-sections.mlir | 2 +-
3 files changed, 35 insertions(+), 24 deletions(-)
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
index 1457be05ca1025..262075ec9b25d0 100644
--- a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
+++ b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
@@ -26,10 +26,10 @@ end subroutine proc
!CHECK: store i32 %[[TID]], ptr %[[TID_LOCAL]]
!CHECK: %[[F_priv:.*]] = alloca ptr
!CHECK: %[[I_priv:.*]] = alloca i32
-!CHECK: store ptr %{{.*}}, ptr %[[F_priv]]
!CHECK: omp.reduction.init:
-!CHECK: store i32 0, ptr %[[I_priv]]
+!CHECK: store ptr %{{.*}}, ptr %[[F_priv]]
+!CHECK: store i32 0, ptr %[[I_priv]]
!CHECK: omp.par.region:
!CHECK: br label %[[MALLOC_BB:.*]]
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 6d14d77c440e67..b64bd7d45a7705 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -593,22 +593,23 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
}
/// Allocate space for privatized reduction variables.
+/// `deferredStores` contains information to create store operations which needs
+/// to be inserted after all allocas
template <typename T>
-static LogicalResult
-allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
- llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation,
- llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
- SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
- SmallVectorImpl<llvm::Value *> &privateReductionVariables,
- DenseMap<Value, llvm::Value *> &reductionVariableMap,
- llvm::ArrayRef<bool> isByRefs) {
+static LogicalResult allocReductionVars(
+ T loop, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
+ SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
+ SmallVectorImpl<llvm::Value *> &privateReductionVariables,
+ DenseMap<Value, llvm::Value *> &reductionVariableMap,
+ SmallVectorImpl<std::pair<llvm::Value *, llvm::Value *>> &deferredStores,
+ llvm::ArrayRef<bool> isByRefs) {
llvm::IRBuilderBase::InsertPointGuard guard(builder);
builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
// delay creating stores until after all allocas
- SmallVector<std::pair<llvm::Value *, llvm::Value *>> storesToCreate;
- storesToCreate.reserve(loop.getNumReductionVars());
+ deferredStores.reserve(loop.getNumReductionVars());
for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
Region &allocRegion = reductionDecls[i].getAllocRegion();
@@ -628,7 +629,7 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
// variable allocated in the inlined region)
llvm::Value *var = builder.CreateAlloca(
moduleTranslation.convertType(reductionDecls[i].getType()));
- storesToCreate.emplace_back(phis[0], var);
+ deferredStores.emplace_back(phis[0], var);
privateReductionVariables[i] = var;
moduleTranslation.mapValue(reductionArgs[i], phis[0]);
@@ -644,10 +645,6 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
}
}
- // TODO: further delay this so it doesn't come in the entry block at all
- for (auto [data, addr] : storesToCreate)
- builder.CreateStore(data, addr);
-
return success();
}
@@ -819,12 +816,19 @@ static LogicalResult allocAndInitializeReductionVars(
if (op.getNumReductionVars() == 0)
return success();
+ SmallVector<std::pair<llvm::Value *, llvm::Value *>> deferredStores;
+
if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
allocaIP, reductionDecls,
privateReductionVariables, reductionVariableMap,
- isByRef)))
+ deferredStores, isByRef)))
return failure();
+ // store result of the alloc region to the allocated pointer to the real
+ // reduction variable
+ for (auto [data, addr] : deferredStores)
+ builder.CreateStore(data, addr);
+
// Before the loop, store the initial values of reductions into reduction
// variables. Although this could be done after allocas, we don't want to mess
// up with the alloca insertion point.
@@ -1359,6 +1363,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
collectReductionDecls(opInst, reductionDecls);
SmallVector<llvm::Value *> privateReductionVariables(
opInst.getNumReductionVars());
+ SmallVector<std::pair<llvm::Value *, llvm::Value *>> deferredStores;
auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
// Allocate reduction vars
@@ -1373,10 +1378,10 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
InsertPointTy(allocaIP.getBlock(),
allocaIP.getBlock()->getTerminator()->getIterator());
- if (failed(allocReductionVars(opInst, reductionArgs, builder,
- moduleTranslation, allocaIP, reductionDecls,
- privateReductionVariables,
- reductionVariableMap, isByRef)))
+ if (failed(allocReductionVars(
+ opInst, reductionArgs, builder, moduleTranslation, allocaIP,
+ reductionDecls, privateReductionVariables, reductionVariableMap,
+ deferredStores, isByRef)))
bodyGenStatus = failure();
// Initialize reduction vars
@@ -1401,6 +1406,12 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
builder.SetInsertPoint(initBlock->getFirstNonPHIOrDbgOrAlloca());
+ // insert stores deferred until after all allocas
+ // these store the results of the alloc region into the allocation for the
+ // pointer to the reduction variable
+ for (auto [data, addr] : deferredStores)
+ builder.CreateStore(data, addr);
+
for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
SmallVector<llvm::Value *> phis;
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
index da6f9430046123..2d8a13ccd2a1f5 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
@@ -89,7 +89,6 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute
// CHECK: %[[VAL_13:.*]] = load i32, ptr %[[VAL_10]], align 4
// CHECK: %[[VAL_20:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
// CHECK: %[[VAL_21:.*]] = alloca ptr, align 8
-// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_21]], align 8
// CHECK: %[[VAL_14:.*]] = alloca [1 x ptr], align 8
// CHECK: br label %[[VAL_15:.*]]
// CHECK: omp.reduction.init: ; preds = %[[VAL_16:.*]]
@@ -98,6 +97,7 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute
// CHECK: br label %[[VAL_18:.*]]
// CHECK: omp.par.region1: ; preds = %[[VAL_17]]
// CHECK: %[[VAL_19:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
+// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_21]], align 8
// CHECK: br label %[[VAL_22:.*]]
// CHECK: omp_section_loop.preheader: ; preds = %[[VAL_18]]
// CHECK: store i32 0, ptr %[[VAL_7]], align 4
>From 52c59f27c8e24835088c8b9937a2b81713fa1833 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Tue, 27 Aug 2024 10:16:01 +0000
Subject: [PATCH 2/2] Define a structure holding defered store args
---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 34 +++++++++++++------
1 file changed, 23 insertions(+), 11 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index b64bd7d45a7705..e1c3dbe60de4b3 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -592,19 +592,31 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
return bodyGenStatus;
}
+namespace {
+/// Contains the arguments for an LLVM store operation
+struct DeferredStore {
+ DeferredStore(llvm::Value *value, llvm::Value *address)
+ : value(value), address(address) {}
+
+ llvm::Value *value;
+ llvm::Value *address;
+};
+} // namespace
+
/// Allocate space for privatized reduction variables.
/// `deferredStores` contains information to create store operations which needs
/// to be inserted after all allocas
template <typename T>
-static LogicalResult allocReductionVars(
- T loop, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation,
- llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
- SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
- SmallVectorImpl<llvm::Value *> &privateReductionVariables,
- DenseMap<Value, llvm::Value *> &reductionVariableMap,
- SmallVectorImpl<std::pair<llvm::Value *, llvm::Value *>> &deferredStores,
- llvm::ArrayRef<bool> isByRefs) {
+static LogicalResult
+allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
+ llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
+ SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
+ SmallVectorImpl<llvm::Value *> &privateReductionVariables,
+ DenseMap<Value, llvm::Value *> &reductionVariableMap,
+ SmallVectorImpl<DeferredStore> &deferredStores,
+ llvm::ArrayRef<bool> isByRefs) {
llvm::IRBuilderBase::InsertPointGuard guard(builder);
builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
@@ -816,7 +828,7 @@ static LogicalResult allocAndInitializeReductionVars(
if (op.getNumReductionVars() == 0)
return success();
- SmallVector<std::pair<llvm::Value *, llvm::Value *>> deferredStores;
+ SmallVector<DeferredStore> deferredStores;
if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
allocaIP, reductionDecls,
@@ -1363,7 +1375,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
collectReductionDecls(opInst, reductionDecls);
SmallVector<llvm::Value *> privateReductionVariables(
opInst.getNumReductionVars());
- SmallVector<std::pair<llvm::Value *, llvm::Value *>> deferredStores;
+ SmallVector<DeferredStore> deferredStores;
auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
// Allocate reduction vars
More information about the flang-commits
mailing list